Re: [PATCH V3] thermal: Add cooling device's statistics in sysfs

From: Eduardo Valentin
Date: Fri Jan 12 2018 - 12:46:13 EST


Hello,

On Thu, Jan 11, 2018 at 03:06:09PM +0530, Viresh Kumar wrote:
> This extends the sysfs interface for thermal cooling devices and exposes
> some pretty useful statistics. These statistics have proven to be quite
> useful specially while doing benchmarks related to the task scheduler,
> where we want to make sure that nothing has disrupted the test,
> specially the cooling device which may have put constraints on the CPUs.
> The information exposed here tells us to what extent the CPUs were
> constrained by the thermal framework.
>
> The read-only "total_trans" file shows the total number of cooling state
> transitions the device has gone through since the time the cooling
> device is registered or the time when statistics were reset last.

It would be good to properly describe what total_trans means. Also, to
have this documented in the thermal sysfs file. Does it mean how many
times the cooling device has been set to a specific state? Or how many
times the state has been changed to that specific value?

>
> The read-only "time_in_state_ms" file shows the time spent by the device
> in the respective cooling states.

What about this file use the same unit as the cpufreq equivalent? IIRC,
the cpufreq node used clock_t.

>
> The write-only "reset" file is used to reset the statistics.
>

cool.

> This is how the directory structure looks like for a single cooling
> device:
>
> $ ls -R /sys/class/thermal/cooling_device0/
> /sys/class/thermal/cooling_device0/:
> cur_state max_state power stats subsystem type uevent
>
> /sys/class/thermal/cooling_device0/power:
> autosuspend_delay_ms runtime_active_time runtime_suspended_time
> control runtime_status
>
> /sys/class/thermal/cooling_device0/stats:
> reset time_in_state_ms total_trans
>

I guess the only missing node from the cpufreq design copy was the
transition table.

Also, I think the stats per thermal zone is also useful, sometimes
more than per cooling device, as I have been using in
the past, but that is just a comment, not really to block your patch.

> This is tested on ARM 32-bit Hisilicon hikey620 board running Ubuntu and
> ARM 64-bit Hisilicon hikey960 board running Android.
>
> Signed-off-by: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
> ---
> V2->V3:
> - Total number of states is max_level + 1. The earlier version didn't
> take that into account and so the stats for the highest state were
> missing.
>
> V1->V2:
> - Move to sysfs from debugfs
>
> drivers/thermal/thermal_core.c | 3 +-
> drivers/thermal/thermal_core.h | 3 +
> drivers/thermal/thermal_helpers.c | 5 +-
> drivers/thermal/thermal_sysfs.c | 146 ++++++++++++++++++++++++++++++++++++++
> include/linux/thermal.h | 1 +
> 5 files changed, 156 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 2b1b0ba393a4..2cc4ae57484e 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -972,8 +972,8 @@ __thermal_cooling_device_register(struct device_node *np,
> cdev->ops = ops;
> cdev->updated = false;
> cdev->device.class = &thermal_class;
> - thermal_cooling_device_setup_sysfs(cdev);
> cdev->devdata = devdata;
> + thermal_cooling_device_setup_sysfs(cdev);
> dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
> result = device_register(&cdev->device);
> if (result) {
> @@ -1106,6 +1106,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
>
> ida_simple_remove(&thermal_cdev_ida, cdev->id);
> device_unregister(&cdev->device);
> + thermal_cooling_device_remove_sysfs(cdev);
> }
> EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);
>
> diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
> index 27e3b1df7360..f6eb01e99816 100644
> --- a/drivers/thermal/thermal_core.h
> +++ b/drivers/thermal/thermal_core.h
> @@ -73,6 +73,9 @@ int thermal_build_list_of_policies(char *buf);
> int thermal_zone_create_device_groups(struct thermal_zone_device *, int);
> void thermal_zone_destroy_device_groups(struct thermal_zone_device *);
> void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *);
> +void thermal_cooling_device_remove_sysfs(struct thermal_cooling_device *cdev);
> +void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
> + unsigned long new_state);
> /* used only at binding time */
> ssize_t
> thermal_cooling_device_trip_point_show(struct device *,
> diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c
> index 8cdf75adcce1..eb03d7e099bb 100644
> --- a/drivers/thermal/thermal_helpers.c
> +++ b/drivers/thermal/thermal_helpers.c
> @@ -187,7 +187,10 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
> if (instance->target > target)
> target = instance->target;
> }
> - cdev->ops->set_cur_state(cdev, target);
> +
> + if (!cdev->ops->set_cur_state(cdev, target))
> + thermal_cooling_device_stats_update(cdev, target);
> +

I might be wrong but, this will maybe double account for cases the same
cooling state is selected.

> cdev->updated = true;
> mutex_unlock(&cdev->lock);
> trace_cdev_update(cdev, target);
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index ba81c9080f6e..88bb26d5d375 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -666,6 +666,121 @@ void thermal_zone_destroy_device_groups(struct thermal_zone_device *tz)
> }
>
> /* sys I/F for cooling device */
> +struct cooling_dev_stats {
> + spinlock_t lock;
> + unsigned int total_trans;
> + unsigned long state;
> + unsigned long max_states;
> + ktime_t last_time;
> + ktime_t *time_in_state;
> +};
> +
> +static void update_time_in_state(struct cooling_dev_stats *stats)
> +{
> + ktime_t now = ktime_get(), delta;
> +
> + delta = ktime_sub(now, stats->last_time);
> + stats->time_in_state[stats->state] =
> + ktime_add(stats->time_in_state[stats->state], delta);
> + stats->last_time = now;
> +}
> +
> +void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
> + unsigned long new_state)
> +{
> + struct cooling_dev_stats *stats = cdev->stats;
> +
> + spin_lock(&stats->lock);
> +
> + if (stats->state == new_state)
> + goto unlock;
> +
> + update_time_in_state(stats);
> + stats->state = new_state;
> + stats->total_trans++;
> +
> +unlock:
> + spin_unlock(&stats->lock);
> +}
> +
> +static ssize_t
> +thermal_cooling_device_total_trans_show(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct thermal_cooling_device *cdev = to_cooling_device(dev);
> + struct cooling_dev_stats *stats = cdev->stats;
> + int ret;
> +
> + spin_lock(&stats->lock);
> + ret = sprintf(buf, "%u\n", stats->total_trans);
> + spin_unlock(&stats->lock);
> +
> + return ret;
> +}
> +
> +static ssize_t
> +thermal_cooling_device_time_in_state_show(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct thermal_cooling_device *cdev = to_cooling_device(dev);
> + struct cooling_dev_stats *stats = cdev->stats;
> + ssize_t len = 0;
> + int i;
> +
> + spin_lock(&stats->lock);
> + update_time_in_state(stats);
> +
> + for (i = 0; i < stats->max_states; i++) {
> + len += sprintf(buf + len, "state%u\t%llu\n", i,
> + ktime_to_ms(stats->time_in_state[i]));
> + }
> + spin_unlock(&stats->lock);
> +
> + return len;
> +}
> +
> +static ssize_t
> +thermal_cooling_device_reset_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count)
> +{
> + struct thermal_cooling_device *cdev = to_cooling_device(dev);
> + struct cooling_dev_stats *stats = cdev->stats;
> + int i;
> +
> + spin_lock(&stats->lock);
> +
> + stats->total_trans = 0;
> + stats->last_time = ktime_get();

So, this is ktime. Then again, might make sense to follow cpufreq unit.

Once again, this needs to go into the documentation, regardless of the
unit we end up with.

> +
> + for (i = 0; i < stats->max_states; i++)
> + stats->time_in_state[i] = ktime_set(0, 0);
> +
> + spin_unlock(&stats->lock);
> +
> + return count;
> +}
> +
> +static DEVICE_ATTR(total_trans, 0444, thermal_cooling_device_total_trans_show,
> + NULL);
> +static DEVICE_ATTR(time_in_state_ms, 0444,
> + thermal_cooling_device_time_in_state_show, NULL);
> +static DEVICE_ATTR(reset, 0200, NULL, thermal_cooling_device_reset_store);
> +
> +static struct attribute *cooling_device_stats_attrs[] = {
> + &dev_attr_total_trans.attr,
> + &dev_attr_time_in_state_ms.attr,
> + &dev_attr_reset.attr,
> + NULL
> +};
> +
> +static const struct attribute_group cooling_device_stats_attr_group = {
> + .attrs = cooling_device_stats_attrs,
> + .name = "stats"
> +};
> +
> static ssize_t
> thermal_cooling_device_type_show(struct device *dev,
> struct device_attribute *attr, char *buf)
> @@ -721,6 +836,7 @@ thermal_cooling_device_cur_state_store(struct device *dev,
> result = cdev->ops->set_cur_state(cdev, state);
> if (result)
> return result;
> + thermal_cooling_device_stats_update(cdev, state);
> return count;
> }
>
> @@ -745,14 +861,44 @@ static const struct attribute_group cooling_device_attr_group = {
>
> static const struct attribute_group *cooling_device_attr_groups[] = {
> &cooling_device_attr_group,
> + &cooling_device_stats_attr_group,
> NULL,
> };
>
> void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *cdev)
> {
> + struct cooling_dev_stats *stats;
> + unsigned long states;
> + int ret, size;
> +
> + ret = cdev->ops->get_max_state(cdev, &states);
> + if (ret)
> + return;
> +
> + states++; /* Total number of states is highest state + 1 */
> + size = sizeof(*stats);
> + size += sizeof(*stats->time_in_state) * states;
> +
> + stats = kzalloc(size, GFP_KERNEL);
> + if (!stats)
> + return;
> +
> + stats->time_in_state = (ktime_t *)(stats + 1);
> + cdev->stats = stats;
> + stats->last_time = ktime_get();
> + stats->max_states = states;
> + cdev->stats = stats;
> +
> + spin_lock_init(&stats->lock);

I would say, the cooling device stat initialization deserves its own
initialization function, don't you think?


Also, I see nothing about sysfs on the lines added to
thermal_cooling_device_setup_sysfs().

> cdev->device.groups = cooling_device_attr_groups;
> }
>
> +void thermal_cooling_device_remove_sysfs(struct thermal_cooling_device *cdev)
> +{
> + kfree(cdev->stats);
> + cdev->stats = NULL;
> +}
> +
> /* these helper will be used only at the time of bindig */
> ssize_t
> thermal_cooling_device_trip_point_show(struct device *dev,
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 8c5302374eaa..7834be668d80 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -148,6 +148,7 @@ struct thermal_cooling_device {
> struct device device;
> struct device_node *np;
> void *devdata;
> + void *stats;
> const struct thermal_cooling_device_ops *ops;
> bool updated; /* true if the cooling device does not need update */
> struct mutex lock; /* protect thermal_instances list */
> --
> 2.15.0.194.g9af6a3dea062
>