[PATCH] /proc/stat: add major page faults time accounting

From: Ruslan Ruslichenko
Date: Mon Aug 14 2017 - 05:07:30 EST


Add accounting of time spend by CPUs handling major
page faults (those which caused disk io).
This may be needed to detect system thrashing situation,
when the system has not enough memory for the working set
and have to constanly paging in/out pages from page cache
with no ability to do anything useful for the rest of the time.

Signed-off-by: Ruslan Ruslichenko <rruslich@xxxxxxxxx>
---
fs/proc/stat.c | 8 ++++++--
include/linux/kernel_stat.h | 1 +
mm/memory.c | 5 +++++
3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 510413eb..d183aa9 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -81,7 +81,7 @@ static int show_stat(struct seq_file *p, void *v)
{
int i, j;
unsigned long jif;
- u64 user, nice, system, idle, iowait, irq, softirq, steal;
+ u64 user, nice, system, idle, iowait, irq, softirq, steal, pgmajflts;
u64 guest, guest_nice;
u64 sum = 0;
u64 sum_softirq = 0;
@@ -89,7 +89,7 @@ static int show_stat(struct seq_file *p, void *v)
struct timespec boottime;

user = nice = system = idle = iowait =
- irq = softirq = steal = 0;
+ irq = softirq = steal = pgmajflts = 0;
guest = guest_nice = 0;
getboottime(&boottime);
jif = boottime.tv_sec;
@@ -105,6 +105,7 @@ static int show_stat(struct seq_file *p, void *v)
steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+ pgmajflts += kcpustat_cpu(i).cpustat[CPUTIME_PGMAJFAULT];
sum += kstat_cpu_irqs_sum(i);
sum += arch_irq_stat_cpu(i);

@@ -128,6 +129,7 @@ static int show_stat(struct seq_file *p, void *v)
seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(pgmajflts));
seq_putc(p, '\n');

for_each_online_cpu(i) {
@@ -142,6 +144,7 @@ static int show_stat(struct seq_file *p, void *v)
steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+ pgmajflts = kcpustat_cpu(i).cpustat[CPUTIME_PGMAJFAULT];
seq_printf(p, "cpu%d", i);
seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
@@ -153,6 +156,7 @@ static int show_stat(struct seq_file *p, void *v)
seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(pgmajflts));
seq_putc(p, '\n');
}
seq_printf(p, "intr %llu", (unsigned long long)sum);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 25a822f..1503706 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,6 +28,7 @@ enum cpu_usage_stat {
CPUTIME_STEAL,
CPUTIME_GUEST,
CPUTIME_GUEST_NICE,
+ CPUTIME_PGMAJFAULT,
NR_STATS,
};

diff --git a/mm/memory.c b/mm/memory.c
index 692cef8..8a40645 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3437,6 +3437,8 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
int ret;
+ unsigned long start_time = current->stime;
+ u64 *cpustat = kcpustat_this_cpu->cpustat;

__set_current_state(TASK_RUNNING);

@@ -3467,6 +3469,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
mem_cgroup_oom_synchronize(false);
}

+ if (ret & VM_FAULT_MAJOR)
+ cpustat[CPUTIME_PGMAJFAULT] += current->stime - start_time;
+
return ret;
}
EXPORT_SYMBOL_GPL(handle_mm_fault);
--
1.9.1


--------------9B27BB3AED4106EF45BCD875--