[PATCH 1/1] cputime: Make the reported utime+stime correspond to the actual runtime.

From: Fredrik Markstrom
Date: Fri Jun 12 2015 - 04:57:55 EST


The scaling mechanism might sometimes cause top to report >100%
(sometimes > 1000%) cpu usage for a single thread. This patch makes
sure that stime+utime corresponds to the actual runtime of the thread.

Signed-off-by: Fredrik Markstrom <fredrik.markstrom@xxxxxxxxx>
---
kernel/sched/cputime.c | 46 +++++++++++++++++++---------------------------
1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index f5a64ff..2d168c8 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -554,22 +554,7 @@ drop_precision:
return (__force cputime_t) scaled;
}

-/*
- * Atomically advance counter to the new value. Interrupts, vcpu
- * scheduling, and scaling inaccuracies can cause cputime_advance
- * to be occasionally called with a new value smaller than counter.
- * Let's enforce atomicity.
- *
- * Normally a caller will only go through this loop once, or not
- * at all in case a previous caller updated counter the same jiffy.
- */
-static void cputime_advance(cputime_t *counter, cputime_t new)
-{
- cputime_t old;
-
- while (new > (old = READ_ONCE(*counter)))
- cmpxchg_cputime(counter, old, new);
-}
+static DEFINE_SPINLOCK(prev_time_lock);

/*
* Adjust tick based cputime random precision against scheduler
@@ -590,17 +575,11 @@ static void cputime_adjust(struct task_cputime *curr,
*
* Fix this by scaling these tick based values against the total
* runtime accounted by the CFS scheduler.
+ * In addition make sure the reported stime+utime equals rtime
+ * so that the total runtime reported is correct.
*/
rtime = nsecs_to_cputime(curr->sum_exec_runtime);

- /*
- * Update userspace visible utime/stime values only if actual execution
- * time is bigger than already exported. Note that can happen, that we
- * provided bigger values due to scaling inaccuracy on big numbers.
- */
- if (prev->stime + prev->utime >= rtime)
- goto out;
-
stime = curr->stime;
utime = curr->utime;

@@ -616,12 +595,25 @@ static void cputime_adjust(struct task_cputime *curr,
utime = rtime - stime;
}

- cputime_advance(&prev->stime, stime);
- cputime_advance(&prev->utime, utime);
+ spin_lock(&prev_time_lock);
+ if (stime < prev->stime) {
+ stime = prev->stime;
+ utime = rtime - stime;
+ } else if (utime < prev->utime) {
+ utime = prev->utime;
+ stime = rtime - utime;
+ }
+ WARN_ON(stime < prev->stime);
+ WARN_ON(utime < prev->utime);
+ WARN_ON(stime + utime != rtime);

-out:
+ if (prev->stime + prev->utime < rtime) {
+ prev->stime = stime;
+ prev->utime = utime;
+ }
*ut = prev->utime;
*st = prev->stime;
+ spin_unlock(&prev_time_lock);
}

void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/