[PATCH] perf: Free AUX area pages in rcu callback

From: Alexander Shishkin
Date: Tue Jun 16 2015 - 07:14:04 EST


Currently, if the user unmaps AUX area while the corresponding event
is active, perf_aux_output_end() may be the last one to drop the aux
area refcount, and end up freeing the pages in NMI context or scheduler's
fast path. Same can happen in the error path of perf_aux_output_begin().

To avoid the bug, this patch moves actual freeing code to rb_free_rcu(),
which will know whether it is called for AUX area or the ring buffer
proper and act accordingly.

Signed-off-by: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Reported-by: Vince Weaver <vincent.weaver@xxxxxxxxx>
---
kernel/events/core.c | 31 ++++++++++++++++++++++++++++++-
kernel/events/internal.h | 1 +
kernel/events/ring_buffer.c | 8 +-------
3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index eddf1ed415..5f1cc5976f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4381,7 +4381,36 @@ static void rb_free_rcu(struct rcu_head *rcu_head)
struct ring_buffer *rb;

rb = container_of(rcu_head, struct ring_buffer, rcu_head);
- rb_free(rb);
+
+ /*
+ * are we called for AUX or the rb:
+ * AUX always goes first, then if rb::refcount drops to zero,
+ * free rb synchronously
+ */
+ if (atomic_read(&rb->refcount)) {
+ __rb_free_aux(rb);
+
+ /* matches the increment in rb_free_aux() */
+ if (atomic_dec_and_test(&rb->refcount))
+ rb_free(rb);
+ } else {
+ rb_free(rb);
+ }
+}
+
+void rb_free_aux(struct ring_buffer *rb)
+{
+ /*
+ * hold rb::refcount to make sure rb doesn't disappear
+ * before aux pages are freed
+ */
+ if (WARN_ON_ONCE(!atomic_inc_not_zero(&rb->refcount)))
+ return;
+
+ if (atomic_dec_and_test(&rb->aux_refcount))
+ call_rcu(&rb->rcu_head, rb_free_rcu);
+ else
+ ring_buffer_put(rb); /* matches the increment above */
}

struct ring_buffer *ring_buffer_get(struct perf_event *event)
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 9f6ce9ba4a..7f8242ed85 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -61,6 +61,7 @@ extern void perf_event_wakeup(struct perf_event *event);
extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
pgoff_t pgoff, int nr_pages, long watermark, int flags);
extern void rb_free_aux(struct ring_buffer *rb);
+extern void __rb_free_aux(struct ring_buffer *rb);
extern struct ring_buffer *ring_buffer_get(struct perf_event *event);
extern void ring_buffer_put(struct ring_buffer *rb);

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 725c416085..343121e943 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -537,7 +537,7 @@ out:
return ret;
}

-static void __rb_free_aux(struct ring_buffer *rb)
+void __rb_free_aux(struct ring_buffer *rb)
{
int pg;

@@ -554,12 +554,6 @@ static void __rb_free_aux(struct ring_buffer *rb)
rb->aux_nr_pages = 0;
}

-void rb_free_aux(struct ring_buffer *rb)
-{
- if (atomic_dec_and_test(&rb->aux_refcount))
- __rb_free_aux(rb);
-}
-
#ifndef CONFIG_PERF_USE_VMALLOC

/*
--
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/