Re: [PATCH] tracing: Fix buffer_ref pipe ops

From: Steven Rostedt
Date: Fri Apr 05 2019 - 11:19:00 EST


On Thu, 4 Apr 2019 23:59:25 +0200
Jann Horn <jannh@xxxxxxxxxx> wrote:

> This fixes multiple issues in buffer_pipe_buf_ops:
>
> - The ->steal() handler must not return zero unless the pipe buffer has
> the only reference to the page. But generic_pipe_buf_steal() assumes
> that every reference to the pipe is tracked by the page's refcount,
> which isn't true for these buffers - buffer_pipe_buf_get(), which
> duplicates a buffer, doesn't touch the page's refcount.
> Fix it by using generic_pipe_buf_nosteal(), which refuses every
> attempted theft. It should be easy to actually support ->steal, but the
> only current users of pipe_buf_steal() are the virtio console and FUSE,
> and they also only use it as an optimization. So it's probably not worth
> the effort.
> - The ->get() and ->release() handlers can be invoked concurrently on pipe
> buffers backed by the same struct buffer_ref. Make them safe against
> concurrency by using refcount_t.
> - The pointers stored in ->private were only zeroed out when the last
> reference to the buffer_ref was dropped. As far as I know, this
> shouldn't be necessary anyway, but if we do it, let's always do it.

Honestly, the entire implementation of the splice code for me was very
confusing. Not much documentation on it, so I just looked at what
others did and tried my best to copy them, bugs and all ;-)

>
> Cc: stable@xxxxxxxxxxxxxxx # v4.11
> Signed-off-by: Jann Horn <jannh@xxxxxxxxxx>
> ---
> Completely untested (apart from compiling it). I don't really know
> anything about how the tracing subsystem works.

I applied this and ran it through my suite of trace-cmd tests which
uses the splice code quite heavily, and it didn't trigger any bugs.

I don't see anything wrong with this patch, so I'll pull it and run it
through the rest of my tests.

Thanks!

-- Steve


>
> fs/splice.c | 4 ++--
> include/linux/pipe_fs_i.h | 1 +
> kernel/trace/trace.c | 28 ++++++++++++++--------------
> 3 files changed, 17 insertions(+), 16 deletions(-)
>
> diff --git a/fs/splice.c b/fs/splice.c
> index 3ee7e82df48f..e75807380caa 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -330,8 +330,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = {
> .get = generic_pipe_buf_get,
> };
>
> -static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
> - struct pipe_buffer *buf)
> +int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
> + struct pipe_buffer *buf)
> {
> return 1;
> }
> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
> index 787d224ff43e..a830e9a00eb9 100644
> --- a/include/linux/pipe_fs_i.h
> +++ b/include/linux/pipe_fs_i.h
> @@ -174,6 +174,7 @@ void free_pipe_info(struct pipe_inode_info *);
> void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
> int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
> int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
> +int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *);
> void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
> void pipe_buf_mark_unmergeable(struct pipe_buffer *buf);
>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 21153e64bf1c..0cfa13a60086 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -7025,19 +7025,23 @@ struct buffer_ref {
> struct ring_buffer *buffer;
> void *page;
> int cpu;
> - int ref;
> + refcount_t refcount;
> };
>
> +static void buffer_ref_release(struct buffer_ref *ref)
> +{
> + if (!refcount_dec_and_test(&ref->refcount))
> + return;
> + ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
> + kfree(ref);
> +}
> +
> static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
> struct pipe_buffer *buf)
> {
> struct buffer_ref *ref = (struct buffer_ref *)buf->private;
>
> - if (--ref->ref)
> - return;
> -
> - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
> - kfree(ref);
> + buffer_ref_release(ref);
> buf->private = 0;
> }
>
> @@ -7046,14 +7050,14 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
> {
> struct buffer_ref *ref = (struct buffer_ref *)buf->private;
>
> - ref->ref++;
> + refcount_inc(&ref->refcount);
> }
>
> /* Pipe buffer operations for a buffer. */
> static const struct pipe_buf_operations buffer_pipe_buf_ops = {
> .confirm = generic_pipe_buf_confirm,
> .release = buffer_pipe_buf_release,
> - .steal = generic_pipe_buf_steal,
> + .steal = generic_pipe_buf_nosteal,
> .get = buffer_pipe_buf_get,
> };
>
> @@ -7066,11 +7070,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
> struct buffer_ref *ref =
> (struct buffer_ref *)spd->partial[i].private;
>
> - if (--ref->ref)
> - return;
> -
> - ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
> - kfree(ref);
> + buffer_ref_release(ref);
> spd->partial[i].private = 0;
> }
>
> @@ -7125,7 +7125,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
> break;
> }
>
> - ref->ref = 1;
> + refcount_set(&ref->refcount, 1);
> ref->buffer = iter->trace_buffer->buffer;
> ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
> if (IS_ERR(ref->page)) {