[PATCH v12 24/31] mm: adding speculative page fault failure trace events

From: Laurent Dufour
Date: Tue Apr 16 2019 - 09:47:03 EST


This patch a set of new trace events to collect the speculative page fault
event failures.

Signed-off-by: Laurent Dufour <ldufour@xxxxxxxxxxxxx>
---
include/trace/events/pagefault.h | 80 ++++++++++++++++++++++++++++++++
mm/memory.c | 57 ++++++++++++++++++-----
2 files changed, 125 insertions(+), 12 deletions(-)
create mode 100644 include/trace/events/pagefault.h

diff --git a/include/trace/events/pagefault.h b/include/trace/events/pagefault.h
new file mode 100644
index 000000000000..d9438f3e6bad
--- /dev/null
+++ b/include/trace/events/pagefault.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pagefault
+
+#if !defined(_TRACE_PAGEFAULT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGEFAULT_H
+
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+
+DECLARE_EVENT_CLASS(spf,
+
+ TP_PROTO(unsigned long caller,
+ struct vm_area_struct *vma, unsigned long address),
+
+ TP_ARGS(caller, vma, address),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, caller)
+ __field(unsigned long, vm_start)
+ __field(unsigned long, vm_end)
+ __field(unsigned long, address)
+ ),
+
+ TP_fast_assign(
+ __entry->caller = caller;
+ __entry->vm_start = vma->vm_start;
+ __entry->vm_end = vma->vm_end;
+ __entry->address = address;
+ ),
+
+ TP_printk("ip:%lx vma:%lx-%lx address:%lx",
+ __entry->caller, __entry->vm_start, __entry->vm_end,
+ __entry->address)
+);
+
+DEFINE_EVENT(spf, spf_vma_changed,
+
+ TP_PROTO(unsigned long caller,
+ struct vm_area_struct *vma, unsigned long address),
+
+ TP_ARGS(caller, vma, address)
+);
+
+DEFINE_EVENT(spf, spf_vma_noanon,
+
+ TP_PROTO(unsigned long caller,
+ struct vm_area_struct *vma, unsigned long address),
+
+ TP_ARGS(caller, vma, address)
+);
+
+DEFINE_EVENT(spf, spf_vma_notsup,
+
+ TP_PROTO(unsigned long caller,
+ struct vm_area_struct *vma, unsigned long address),
+
+ TP_ARGS(caller, vma, address)
+);
+
+DEFINE_EVENT(spf, spf_vma_access,
+
+ TP_PROTO(unsigned long caller,
+ struct vm_area_struct *vma, unsigned long address),
+
+ TP_ARGS(caller, vma, address)
+);
+
+DEFINE_EVENT(spf, spf_pmd_changed,
+
+ TP_PROTO(unsigned long caller,
+ struct vm_area_struct *vma, unsigned long address),
+
+ TP_ARGS(caller, vma, address)
+);
+
+#endif /* _TRACE_PAGEFAULT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/memory.c b/mm/memory.c
index 1991da97e2db..509851ad7c95 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -81,6 +81,9 @@

#include "internal.h"

+#define CREATE_TRACE_POINTS
+#include <trace/events/pagefault.h>
+
#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
#endif
@@ -2100,8 +2103,10 @@ static bool pte_spinlock(struct vm_fault *vmf)

again:
local_irq_disable();
- if (vma_has_changed(vmf))
+ if (vma_has_changed(vmf)) {
+ trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
goto out;
+ }

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
@@ -2109,8 +2114,10 @@ static bool pte_spinlock(struct vm_fault *vmf)
* is not a huge collapse operation in progress in our back.
*/
pmdval = READ_ONCE(*vmf->pmd);
- if (!pmd_same(pmdval, vmf->orig_pmd))
+ if (!pmd_same(pmdval, vmf->orig_pmd)) {
+ trace_spf_pmd_changed(_RET_IP_, vmf->vma, vmf->address);
goto out;
+ }
#endif

vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
@@ -2121,6 +2128,7 @@ static bool pte_spinlock(struct vm_fault *vmf)

if (vma_has_changed(vmf)) {
spin_unlock(vmf->ptl);
+ trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
goto out;
}

@@ -2154,8 +2162,10 @@ static bool pte_map_lock(struct vm_fault *vmf)
*/
again:
local_irq_disable();
- if (vma_has_changed(vmf))
+ if (vma_has_changed(vmf)) {
+ trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
goto out;
+ }

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
@@ -2163,8 +2173,10 @@ static bool pte_map_lock(struct vm_fault *vmf)
* is not a huge collapse operation in progress in our back.
*/
pmdval = READ_ONCE(*vmf->pmd);
- if (!pmd_same(pmdval, vmf->orig_pmd))
+ if (!pmd_same(pmdval, vmf->orig_pmd)) {
+ trace_spf_pmd_changed(_RET_IP_, vmf->vma, vmf->address);
goto out;
+ }
#endif

/*
@@ -2184,6 +2196,7 @@ static bool pte_map_lock(struct vm_fault *vmf)

if (vma_has_changed(vmf)) {
pte_unmap_unlock(pte, ptl);
+ trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
goto out;
}

@@ -4187,47 +4200,60 @@ vm_fault_t __handle_speculative_fault(struct mm_struct *mm,

/* rmb <-> seqlock,vma_rb_erase() */
seq = raw_read_seqcount(&vma->vm_sequence);
- if (seq & 1)
+ if (seq & 1) {
+ trace_spf_vma_changed(_RET_IP_, vma, address);
goto out_put;
+ }

/*
* Can't call vm_ops service has we don't know what they would do
* with the VMA.
* This include huge page from hugetlbfs.
*/
- if (vma->vm_ops && vma->vm_ops->fault)
+ if (vma->vm_ops && vma->vm_ops->fault) {
+ trace_spf_vma_notsup(_RET_IP_, vma, address);
goto out_put;
+ }

/*
* __anon_vma_prepare() requires the mmap_sem to be held
* because vm_next and vm_prev must be safe. This can't be guaranteed
* in the speculative path.
*/
- if (unlikely(!vma->anon_vma))
+ if (unlikely(!vma->anon_vma)) {
+ trace_spf_vma_notsup(_RET_IP_, vma, address);
goto out_put;
+ }

vmf.vma_flags = READ_ONCE(vma->vm_flags);
vmf.vma_page_prot = READ_ONCE(vma->vm_page_prot);

/* Can't call userland page fault handler in the speculative path */
- if (unlikely(vmf.vma_flags & VM_UFFD_MISSING))
+ if (unlikely(vmf.vma_flags & VM_UFFD_MISSING)) {
+ trace_spf_vma_notsup(_RET_IP_, vma, address);
goto out_put;
+ }

- if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP)
+ if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP) {
/*
* This could be detected by the check address against VMA's
* boundaries but we want to trace it as not supported instead
* of changed.
*/
+ trace_spf_vma_notsup(_RET_IP_, vma, address);
goto out_put;
+ }

if (address < READ_ONCE(vma->vm_start)
- || READ_ONCE(vma->vm_end) <= address)
+ || READ_ONCE(vma->vm_end) <= address) {
+ trace_spf_vma_changed(_RET_IP_, vma, address);
goto out_put;
+ }

if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
flags & FAULT_FLAG_INSTRUCTION,
flags & FAULT_FLAG_REMOTE)) {
+ trace_spf_vma_access(_RET_IP_, vma, address);
ret = VM_FAULT_SIGSEGV;
goto out_put;
}
@@ -4235,10 +4261,12 @@ vm_fault_t __handle_speculative_fault(struct mm_struct *mm,
/* This is one is required to check that the VMA has write access set */
if (flags & FAULT_FLAG_WRITE) {
if (unlikely(!(vmf.vma_flags & VM_WRITE))) {
+ trace_spf_vma_access(_RET_IP_, vma, address);
ret = VM_FAULT_SIGSEGV;
goto out_put;
}
} else if (unlikely(!(vmf.vma_flags & (VM_READ|VM_EXEC|VM_WRITE)))) {
+ trace_spf_vma_access(_RET_IP_, vma, address);
ret = VM_FAULT_SIGSEGV;
goto out_put;
}
@@ -4252,8 +4280,10 @@ vm_fault_t __handle_speculative_fault(struct mm_struct *mm,
pol = __get_vma_policy(vma, address);
if (!pol)
pol = get_task_policy(current);
- if (pol && pol->mode == MPOL_INTERLEAVE)
+ if (pol && pol->mode == MPOL_INTERLEAVE) {
+ trace_spf_vma_notsup(_RET_IP_, vma, address);
goto out_put;
+ }
#endif

/*
@@ -4326,8 +4356,10 @@ vm_fault_t __handle_speculative_fault(struct mm_struct *mm,
* We need to re-validate the VMA after checking the bounds, otherwise
* we might have a false positive on the bounds.
*/
- if (read_seqcount_retry(&vma->vm_sequence, seq))
+ if (read_seqcount_retry(&vma->vm_sequence, seq)) {
+ trace_spf_vma_changed(_RET_IP_, vma, address);
goto out_put;
+ }

mem_cgroup_enter_user_fault();
ret = handle_pte_fault(&vmf);
@@ -4346,6 +4378,7 @@ vm_fault_t __handle_speculative_fault(struct mm_struct *mm,
return ret;

out_walk:
+ trace_spf_vma_notsup(_RET_IP_, vma, address);
local_irq_enable();
out_put:
put_vma(vma);
--
2.21.0