[RFC PATCH v1 2/3] Make calls to account_scaled_stats

From: Vaidyanathan Srinivasan
Date: Mon May 26 2008 - 10:30:41 EST


Hook various accounting functions to call scaled stats

* Hook porcess contect switch: __switch_to()
* Hook IRQ handling account_system_vtime() in hardirq.hA
* Update __delayacct_add_tsk() to take care of scaling by 1000
* Update bacct_add_tsk() to take care of scaling by 1000

Signed-off-by: Amit K. Arora <aarora@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Vaidyanathan Srinivasan <svaidy@xxxxxxxxxxxxxxxxxx>
---

arch/x86/kernel/process_32.c | 8 ++++++++
include/linux/hardirq.h | 4 ++++
kernel/delayacct.c | 7 ++++++-
kernel/timer.c | 2 --
kernel/tsacct.c | 10 ++++++++--
5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8476df..c81a783 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -56,6 +56,9 @@
#include <asm/cpu.h>
#include <asm/kdebug.h>

+extern void account_scaled_stats(struct task_struct *tsk);
+extern void reset_for_scaled_stats(struct task_struct *tsk);
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

static int hlt_counter;
@@ -660,6 +663,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
loadsegment(gs, next->gs);

x86_write_percpu(current_task, next_p);
+ /* Account scaled statistics for the task leaving CPU */
+ account_scaled_stats(prev_p);
+ barrier();
+ /* Initialise stats counter for new task */
+ reset_for_scaled_stats(next_p);

return prev_p;
}
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006c..4458736 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -7,6 +7,9 @@
#include <asm/hardirq.h>
#include <asm/system.h>

+/* TBD: Add config option */
+extern void account_scaled_stats(struct task_struct *tsk);
+
/*
* We put the hardirq and softirq counter into the preemption
* counter. The bitmask has the following meaning:
@@ -115,6 +118,7 @@ struct task_struct;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
static inline void account_system_vtime(struct task_struct *tsk)
{
+ account_scaled_stats(tsk);
}
#endif

diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 10e43fd..3e2938f 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -117,7 +117,12 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)

tmp = (s64)d->cpu_scaled_run_real_total;
cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts);
- tmp += timespec_to_ns(&ts);
+ /* HACK: Remember, we multipled the cputime_t by 1000 to include
+ * fraction. Now it is time to scale it back to correct 'ns' value.
+ * Perhaps, we should use nano second unit (u64 type) for utimescaled
+ * and stimescaled?
+ */
+ tmp += div_s64(timespec_to_ns(&ts),1000);
d->cpu_scaled_run_real_total =
(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;

diff --git a/kernel/timer.c b/kernel/timer.c
index ceacc66..de8a615 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -964,10 +964,8 @@ void account_process_tick(struct task_struct *p, int user_tick)

if (user_tick) {
account_user_time(p, one_jiffy);
- account_user_time_scaled(p, cputime_to_scaled(one_jiffy));
} else {
account_system_time(p, HARDIRQ_OFFSET, one_jiffy);
- account_system_time_scaled(p, cputime_to_scaled(one_jiffy));
}
}
#endif
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 4ab1b58..ee0d93b 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -62,10 +62,16 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
rcu_read_unlock();
stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC;
stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
+ /* HACK: cputime unit is jiffies on x86 and not good for fractional
+ * additional. cputime_t type {u,s}timescaled is multiplied by
+ * 1000 for scaled accounting. Hence, cputime_to_msecs will actually
+ * give the required micro second value. The multiplier
+ * USEC_PER_MSEC has been dropped.
+ */
stats->ac_utimescaled =
- cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC;
+ cputime_to_msecs(tsk->utimescaled);
stats->ac_stimescaled =
- cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC;
+ cputime_to_msecs(tsk->stimescaled);
stats->ac_minflt = tsk->min_flt;
stats->ac_majflt = tsk->maj_flt;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/