Re: [PATCH v3 2/2] PM / QoS: Fix device resume latency framework

From: Rafael J. Wysocki
Date: Tue Nov 07 2017 - 05:12:22 EST


On Tue, Nov 7, 2017 at 5:33 AM, Ramesh Thomas <ramesh.thomas@xxxxxxxxx> wrote:
> On 2017-11-07 at 02:27:05 +0100, Rafael J. Wysocki wrote:
>> From: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
>>
>> The special value of 0 for device resume latency PM QoS means
>> "no restriction", but there are two problems with that.
>>
>> First, device resume latency PM QoS requests with 0 as the
>> value are always put in front of requests with positive
>> values in the priority lists used internally by the PM QoS
>> framework, causing 0 to be chosen as an effective constraint
>> value. However, that 0 is then interpreted as "no restriction"
>> effectively overriding the other requests with specific
>> restrictions which is incorrect.
>>
>> Second, the users of device resume latency PM QoS have no
>> way to specify that *any* resume latency at all should be
>> avoided, which is an artificial limitation in general.
>>
>> To address these issues, modify device resume latency PM QoS to
>> use S32_MAX as the "no constraint" value and 0 as the "no
>> latency at all" one and rework its users (the cpuidle menu
>> governor, the genpd QoS governor and the runtime PM framework)
>> to follow these changes.
>>
>> Also add a special "n/a" value to the corresponding user space I/F
>> to allow user space to indicate that it cannot accept any resume
>> latencies at all for the given device.
>>
>> Fixes: 85dc0b8a4019 (PM / QoS: Make it possible to expose PM QoS latency constraints)
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=197323
>> Reported-by: Reinette Chatre <reinette.chatre@xxxxxxxxx>
>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>
>> Tested-by: Reinette Chatre <reinette.chatre@xxxxxxxxx>
>> Tested-by: Geert Uytterhoeven <geert+renesas@xxxxxxxxx>
>> ---
>> Documentation/ABI/testing/sysfs-devices-power | 4 ++-
>> drivers/base/cpu.c | 3 +-
>> drivers/base/power/domain.c | 2 -
>> drivers/base/power/domain_governor.c | 33 ++++++++++----------------
>> drivers/base/power/qos.c | 5 +++
>> drivers/base/power/runtime.c | 2 -
>> drivers/base/power/sysfs.c | 25 ++++++++++++++++---
>> drivers/cpuidle/governors/menu.c | 4 +--
>> include/linux/pm_qos.h | 26 ++++++++++++++------
>> 9 files changed, 65 insertions(+), 39 deletions(-)
>>
>> Index: linux-pm/drivers/base/power/sysfs.c
>> ===================================================================
>> --- linux-pm.orig/drivers/base/power/sysfs.c
>> +++ linux-pm/drivers/base/power/sysfs.c
>> @@ -218,7 +218,14 @@ static ssize_t pm_qos_resume_latency_sho
>> struct device_attribute *attr,
>> char *buf)
>> {
>> - return sprintf(buf, "%d\n", dev_pm_qos_requested_resume_latency(dev));
>> + s32 value = dev_pm_qos_requested_resume_latency(dev);
>> +
>> + if (value == 0)
>> + return sprintf(buf, "n/a\n");
>> + else if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
>> + value = 0;
>> +
>> + return sprintf(buf, "%d\n", value);
>> }
>>
>> static ssize_t pm_qos_resume_latency_store(struct device *dev,
>> @@ -228,11 +235,21 @@ static ssize_t pm_qos_resume_latency_sto
>> s32 value;
>> int ret;
>>
>> - if (kstrtos32(buf, 0, &value))
>> - return -EINVAL;
>> + if (!kstrtos32(buf, 0, &value)) {
>> + /*
>> + * Prevent users from writing negative or "no constraint" values
>> + * directly.
>> + */
>> + if (value < 0 || value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
>> + return -EINVAL;
>>
>> - if (value < 0)
>> + if (value == 0)
>> + value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
>> + } else if (!strcmp(buf, "n/a") || !strcmp(buf, "n/a\n")) {
>> + value = 0;
>> + } else {
>> return -EINVAL;
>> + }
>>
>> ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req,
>> value);
>> Index: linux-pm/include/linux/pm_qos.h
>> ===================================================================
>> --- linux-pm.orig/include/linux/pm_qos.h
>> +++ linux-pm/include/linux/pm_qos.h
>> @@ -28,16 +28,19 @@ enum pm_qos_flags_status {
>> PM_QOS_FLAGS_ALL,
>> };
>>
>> -#define PM_QOS_DEFAULT_VALUE -1
>> +#define PM_QOS_DEFAULT_VALUE (-1)
>> +#define PM_QOS_LATENCY_ANY S32_MAX
>> +#define PM_QOS_LATENCY_ANY_NS ((s64)PM_QOS_LATENCY_ANY * NSEC_PER_USEC)
>>
>> #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
>> #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
>> #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0
>> #define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0
>> -#define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0
>> +#define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE PM_QOS_LATENCY_ANY
>> +#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY
>> +#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS
>> #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0
>> #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1)
>> -#define PM_QOS_LATENCY_ANY ((s32)(~(__u32)0 >> 1))
>>
>> #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0)
>>
>> @@ -174,7 +177,8 @@ static inline s32 dev_pm_qos_requested_f
>> static inline s32 dev_pm_qos_raw_read_value(struct device *dev)
>> {
>> return IS_ERR_OR_NULL(dev->power.qos) ?
>> - 0 : pm_qos_read_value(&dev->power.qos->resume_latency);
>> + PM_QOS_RESUME_LATENCY_NO_CONSTRAINT :
>> + pm_qos_read_value(&dev->power.qos->resume_latency);
>> }
>> #else
>> static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev,
>> @@ -184,9 +188,9 @@ static inline enum pm_qos_flags_status d
>> s32 mask)
>> { return PM_QOS_FLAGS_UNDEFINED; }
>> static inline s32 __dev_pm_qos_read_value(struct device *dev)
>> - { return 0; }
>> + { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; }
>> static inline s32 dev_pm_qos_read_value(struct device *dev)
>> - { return 0; }
>> + { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; }
>> static inline int dev_pm_qos_add_request(struct device *dev,
>> struct dev_pm_qos_request *req,
>> enum dev_pm_qos_req_type type,
>> @@ -232,9 +236,15 @@ static inline int dev_pm_qos_expose_late
>> { return 0; }
>> static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {}
>>
>> -static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return 0; }
>> +static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev)
>> +{
>> + return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
>> +}
>> static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; }
>> -static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return 0; }
>> +static inline s32 dev_pm_qos_raw_read_value(struct device *dev)
>> +{
>> + return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
>> +}
>> #endif
>>
>> #endif
>> Index: linux-pm/drivers/cpuidle/governors/menu.c
>> ===================================================================
>> --- linux-pm.orig/drivers/cpuidle/governors/menu.c
>> +++ linux-pm/drivers/cpuidle/governors/menu.c
>> @@ -298,8 +298,8 @@ static int menu_select(struct cpuidle_dr
>> data->needs_update = 0;
>> }
>>
>> - /* resume_latency is 0 means no restriction */
>> - if (resume_latency && resume_latency < latency_req)
>> + if (resume_latency < latency_req &&
>> + resume_latency != PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
>> latency_req = resume_latency;
>>
>> /* Special case when user has set very strict latency requirement */
>> Index: linux-pm/drivers/base/power/runtime.c
>> ===================================================================
>> --- linux-pm.orig/drivers/base/power/runtime.c
>> +++ linux-pm/drivers/base/power/runtime.c
>> @@ -253,7 +253,7 @@ static int rpm_check_suspend_allowed(str
>> || (dev->power.request_pending
>> && dev->power.request == RPM_REQ_RESUME))
>> retval = -EAGAIN;
>> - else if (__dev_pm_qos_read_value(dev) < 0)
>> + else if (__dev_pm_qos_read_value(dev) == 0)
>> retval = -EPERM;
>> else if (dev->power.runtime_status == RPM_SUSPENDED)
>> retval = 1;
>> Index: linux-pm/drivers/base/cpu.c
>> ===================================================================
>> --- linux-pm.orig/drivers/base/cpu.c
>> +++ linux-pm/drivers/base/cpu.c
>> @@ -377,7 +377,8 @@ int register_cpu(struct cpu *cpu, int nu
>>
>> per_cpu(cpu_sys_devices, num) = &cpu->dev;
>> register_cpu_under_node(num, cpu_to_node(num));
>> - dev_pm_qos_expose_latency_limit(&cpu->dev, 0);
>> + dev_pm_qos_expose_latency_limit(&cpu->dev,
>> + PM_QOS_RESUME_LATENCY_NO_CONSTRAINT);
>>
>> return 0;
>> }
>> Index: linux-pm/drivers/base/power/qos.c
>> ===================================================================
>> --- linux-pm.orig/drivers/base/power/qos.c
>> +++ linux-pm/drivers/base/power/qos.c
>> @@ -139,6 +139,9 @@ static int apply_constraint(struct dev_p
>>
>> switch(req->type) {
>> case DEV_PM_QOS_RESUME_LATENCY:
>> + if (WARN_ON(action != PM_QOS_REMOVE_REQ && value < 0))
>> + value = 0;
>> +
>> ret = pm_qos_update_target(&qos->resume_latency,
>> &req->data.pnode, action, value);
>> break;
>> @@ -189,7 +192,7 @@ static int dev_pm_qos_constraints_alloca
>> plist_head_init(&c->list);
>> c->target_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
>> c->default_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
>> - c->no_constraint_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
>> + c->no_constraint_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
>> c->type = PM_QOS_MIN;
>> c->notifiers = n;
>>
>> Index: linux-pm/Documentation/ABI/testing/sysfs-devices-power
>> ===================================================================
>> --- linux-pm.orig/Documentation/ABI/testing/sysfs-devices-power
>> +++ linux-pm/Documentation/ABI/testing/sysfs-devices-power
>> @@ -211,7 +211,9 @@ Description:
>> device, after it has been suspended at run time, from a resume
>> request to the moment the device will be ready to process I/O,
>> in microseconds. If it is equal to 0, however, this means that
>> - the PM QoS resume latency may be arbitrary.
>> + the PM QoS resume latency may be arbitrary and the special value
>> + "n/a" means that user space cannot accept any resume latency at
>> + all for the given device.
>>
>> Not all drivers support this attribute. If it isn't supported,
>> it is not present.
>> Index: linux-pm/drivers/base/power/domain.c
>> ===================================================================
>> --- linux-pm.orig/drivers/base/power/domain.c
>> +++ linux-pm/drivers/base/power/domain.c
>> @@ -1331,7 +1331,7 @@ static struct generic_pm_domain_data *ge
>>
>> gpd_data->base.dev = dev;
>> gpd_data->td.constraint_changed = true;
>> - gpd_data->td.effective_constraint_ns = 0;
>> + gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
>> gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier;
>>
>> spin_lock_irq(&dev->power.lock);
>> Index: linux-pm/drivers/base/power/domain_governor.c
>> ===================================================================
>> --- linux-pm.orig/drivers/base/power/domain_governor.c
>> +++ linux-pm/drivers/base/power/domain_governor.c
>> @@ -33,15 +33,10 @@ static int dev_update_qos_constraint(str
>> * known at this point anyway).
>> */
>> constraint_ns = dev_pm_qos_read_value(dev);
>> - if (constraint_ns > 0)
>> - constraint_ns *= NSEC_PER_USEC;
>> + constraint_ns *= NSEC_PER_USEC;
>> }
>>
>> - /* 0 means "no constraint" */
>> - if (constraint_ns == 0)
>> - return 0;
>> -
>> - if (constraint_ns < *constraint_ns_p || *constraint_ns_p == 0)
>> + if (constraint_ns < *constraint_ns_p)
>> *constraint_ns_p = constraint_ns;
>>
>> return 0;
>> @@ -69,12 +64,12 @@ static bool default_suspend_ok(struct de
>> }
>> td->constraint_changed = false;
>> td->cached_suspend_ok = false;
>> - td->effective_constraint_ns = -1;
>> + td->effective_constraint_ns = 0;
>> constraint_ns = __dev_pm_qos_read_value(dev);
>>
>> spin_unlock_irqrestore(&dev->power.lock, flags);
>>
>> - if (constraint_ns < 0)
>> + if (constraint_ns == 0)
>> return false;
>>
>> constraint_ns *= NSEC_PER_USEC;
>> @@ -87,25 +82,25 @@ static bool default_suspend_ok(struct de
>> device_for_each_child(dev, &constraint_ns,
>> dev_update_qos_constraint);
>>
>> - if (constraint_ns == 0) {
>> + if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS) {
>> /* "No restriction", so the device is allowed to suspend. */
>> td->effective_constraint_ns = 0;
>
> I think you forgot to change this. Assigning 0 would cause its parent to
> fail suspend thinking this child has "no suspend at all" restriction. Should
> assign PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS.

Yes, you are right, I'll send a v4 shortly.

> Should a macro be created for "no suspend at all" so it will be more clear?

Not really. The 0 here could just be any other value and it still
would be incorrect. :-)



>> td->cached_suspend_ok = true;
>> - } else if (constraint_ns < 0) {
>> + } else if (constraint_ns == 0) {
>> /*
>> * This triggers if one of the children that don't belong to a
>> - * domain has a negative PM QoS constraint and it's better not
>> - * to suspend then. effective_constraint_ns is negative already
>> - * and cached_suspend_ok is false, so bail out.
>> + * domain has a zero PM QoS constraint and it's better not to
>> + * suspend then. effective_constraint_ns is zero already and
>> + * cached_suspend_ok is false, so bail out.
>> */
>> return false;
>> } else {
>> constraint_ns -= td->suspend_latency_ns +
>> td->resume_latency_ns;
>> /*
>> - * effective_constraint_ns is negative already and
>> - * cached_suspend_ok is false, so if the computed value is not
>> - * positive, return right away.
>> + * effective_constraint_ns is zero already and cached_suspend_ok
>> + * is false, so if the computed value is not positive, return
>> + * right away.
>> */
>> if (constraint_ns <= 0)
>> return false;
>> @@ -177,10 +172,8 @@ static bool __default_power_down_ok(stru
>> * Negative values mean "no suspend at all" and this runs only
>> * when all devices in the domain are suspended, so it must be
>> * 0 at least.
>
> I think this message is carried from the previous patch and may need to be
> revised.
>
> In this change, negative value does not mean "no suspend at all". It also
> cannot be 0 or else it would not have suspended. It will alway be > 0 if it
> reaches here.

Right, I overlooked that too.

Thanks,
Rafael