[PATCH 23/23] fs: don't allow splice read/write without explicit ops

From: Christoph Hellwig
Date: Tue Jul 07 2020 - 13:57:29 EST


Don't allow calling ->read or ->write with set_fs as a preparation for
killing off set_fs. While I've not triggered any of these cases in my
setups as all the usual suspect (file systems, pipes, sockets, block
devices, system character devices) use the iter ops this is almost
going to be guaranteed to eventuall break something, so print a detailed
error message helping to debug such cases. The fix will be to switch the
affected driver to use the iter ops.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
fs/read_write.c | 2 +-
fs/splice.c | 121 ++++-----------------------------------------
include/linux/fs.h | 2 -
3 files changed, 10 insertions(+), 115 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 8d8113ae8561e6..c33182f97d1ef0 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1077,7 +1077,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
}
EXPORT_SYMBOL(vfs_iter_write);

-ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
+static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
diff --git a/fs/splice.c b/fs/splice.c
index 52485158023778..3ceaaf3b8c122c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -342,89 +342,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
};
EXPORT_SYMBOL(nosteal_pipe_buf_ops);

-static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
- unsigned long vlen, loff_t offset)
-{
- mm_segment_t old_fs;
- loff_t pos = offset;
- ssize_t res;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- /* The cast to a user pointer is valid due to the set_fs() */
- res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
- set_fs(old_fs);
-
- return res;
-}
-
-static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
- struct iov_iter to;
- struct page **pages;
- unsigned int nr_pages;
- unsigned int mask;
- size_t offset, base, copied = 0;
- ssize_t res;
- int i;
-
- if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
- return -EAGAIN;
-
- /*
- * Try to keep page boundaries matching to source pagecache ones -
- * it probably won't be much help, but...
- */
- offset = *ppos & ~PAGE_MASK;
-
- iov_iter_pipe(&to, READ, pipe, len + offset);
-
- res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
- if (res <= 0)
- return -ENOMEM;
-
- nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
-
- vec = __vec;
- if (nr_pages > PIPE_DEF_BUFFERS) {
- vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL);
- if (unlikely(!vec)) {
- res = -ENOMEM;
- goto out;
- }
- }
-
- mask = pipe->ring_size - 1;
- pipe->bufs[to.head & mask].offset = offset;
- pipe->bufs[to.head & mask].len -= offset;
-
- for (i = 0; i < nr_pages; i++) {
- size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
- vec[i].iov_base = page_address(pages[i]) + offset;
- vec[i].iov_len = this_len;
- len -= this_len;
- offset = 0;
- }
-
- res = kernel_readv(in, vec, nr_pages, *ppos);
- if (res > 0) {
- copied = res;
- *ppos += res;
- }
-
- if (vec != __vec)
- kfree(vec);
-out:
- for (i = 0; i < nr_pages; i++)
- put_page(pages[i]);
- kvfree(pages);
- iov_iter_advance(&to, copied); /* truncates and discards */
- return res;
-}
-
/*
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
* using sendpage(). Return the number of bytes sent.
@@ -788,33 +705,6 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,

EXPORT_SYMBOL(iter_file_splice_write);

-static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
- struct splice_desc *sd)
-{
- int ret;
- void *data;
- loff_t tmp = sd->pos;
-
- data = kmap(buf->page);
- ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
- kunmap(buf->page);
-
- return ret;
-}
-
-static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
- struct file *out, loff_t *ppos,
- size_t len, unsigned int flags)
-{
- ssize_t ret;
-
- ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
- if (ret > 0)
- *ppos += ret;
-
- return ret;
-}
-
/**
* generic_splice_sendpage - splice data from a pipe to a socket
* @pipe: pipe to splice from
@@ -844,7 +734,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
{
if (out->f_op->splice_write)
return out->f_op->splice_write(pipe, out, ppos, len, flags);
- return default_file_splice_write(pipe, out, ppos, len, flags);
+ pr_warn_ratelimited(
+ "splice write not supported for file %pD4 (pid: %d comm: %.20s)\n",
+ out, current->pid, current->comm);
+ return -EINVAL;
}

/*
@@ -870,7 +763,11 @@ static long do_splice_to(struct file *in, loff_t *ppos,
return in->f_op->splice_read(in, ppos, pipe, len, flags);
if (in->f_op->read_iter)
return generic_file_splice_read(in, ppos, pipe, len, flags);
- return default_file_splice_read(in, ppos, pipe, len, flags);
+
+ pr_warn_ratelimited(
+ "splice read not supported for file %pD4 (pid: %d comm: %.20s)\n",
+ in, current->pid, current->comm);
+ return -EINVAL;
}

/**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0c0ec76b600b50..fac6aead402a98 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1919,8 +1919,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,

extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
-extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
- unsigned long, loff_t *, rwf_t);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
--
2.26.2