[tip:x86/pti] x86/events/intel/ds: Use the proper cache flush method for mapping ds buffers

From: tip-bot for Peter Zijlstra
Date: Thu Jan 04 2018 - 18:53:57 EST


Commit-ID: 42f3bdc5dd962a5958bc024c1e1444248a6b8b4a
Gitweb: https://git.kernel.org/tip/42f3bdc5dd962a5958bc024c1e1444248a6b8b4a
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Thu, 4 Jan 2018 18:07:12 +0100
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Fri, 5 Jan 2018 00:39:58 +0100

x86/events/intel/ds: Use the proper cache flush method for mapping ds buffers

Thomas reported the following warning:

BUG: using smp_processor_id() in preemptible [00000000] code: ovsdb-server/4498
caller is native_flush_tlb_single+0x57/0xc0
native_flush_tlb_single+0x57/0xc0
__set_pte_vaddr+0x2d/0x40
set_pte_vaddr+0x2f/0x40
cea_set_pte+0x30/0x40
ds_update_cea.constprop.4+0x4d/0x70
reserve_ds_buffers+0x159/0x410
x86_reserve_hardware+0x150/0x160
x86_pmu_event_init+0x3e/0x1f0
perf_try_init_event+0x69/0x80
perf_event_alloc+0x652/0x740
SyS_perf_event_open+0x3f6/0xd60
do_syscall_64+0x5c/0x190

set_pte_vaddr is used to map the ds buffers into the cpu entry area, but
there are two problems with that:

1) The resulting flush is not supposed to be called in preemptible context

2) The cpu entry area is supposed to be per CPU, but the debug store
buffers are mapped for all CPUs so these mappings need to be flushed
globally.

Add the necessary preemption protection across the mapping code and flush
TLBs globally.

Fixes: c1961a4631da ("x86/events/intel/ds: Map debug buffers in cpu_entry_area")
Reported-by: Thomas Zeitlhofer <thomas.zeitlhofer+lkml@xxxxxxxx>
Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Tested-by: Thomas Zeitlhofer <thomas.zeitlhofer+lkml@xxxxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
Link: https://lkml.kernel.org/r/20180104170712.GB3040@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

---
arch/x86/events/intel/ds.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8f0aace..8156e47 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -5,6 +5,7 @@

#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
+#include <asm/tlbflush.h>
#include <asm/insn.h>

#include "../perf_event.h"
@@ -283,20 +284,35 @@ static DEFINE_PER_CPU(void *, insn_buffer);

static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
+ unsigned long start = (unsigned long)cea;
phys_addr_t pa;
size_t msz = 0;

pa = virt_to_phys(addr);
+
+ preempt_disable();
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, pa, prot);
+
+ /*
+ * This is a cross-CPU update of the cpu_entry_area, we must shoot down
+ * all TLB entries for it.
+ */
+ flush_tlb_kernel_range(start, start + size);
+ preempt_enable();
}

static void ds_clear_cea(void *cea, size_t size)
{
+ unsigned long start = (unsigned long)cea;
size_t msz = 0;

+ preempt_disable();
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, 0, PAGE_NONE);
+
+ flush_tlb_kernel_range(start, start + size);
+ preempt_enable();
}

static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)