Re: [PATCH] fs/proc/kcore.c: Omit kernel text area for hardened usercopy feature

From: Jiri Olsa
Date: Wed Sep 07 2016 - 17:25:00 EST


On Wed, Sep 07, 2016 at 09:25:59PM +0200, Jiri Olsa wrote:
> On Wed, Sep 07, 2016 at 09:58:01AM -0700, Linus Torvalds wrote:
> > On Wed, Sep 7, 2016 at 9:38 AM, Andi Kleen <andi@xxxxxxxxxxxxxx> wrote:
> > >>
> > >> - n = copy_to_user(buffer, (char *)start, tsz);
> > >> + buf = kzalloc(tsz, GFP_KERNEL);
> > >
> > > You have to add some limit and a loop, otherwise a user can eat all kernel memory,
> > > or copies > KMALLOC_MAX wouldn't work. Probably only get a single page.
> >
> > 'start' and 'tsz' is already chunked to be aligned pages (well, as
> > aligned as they can be: the beginning and end obviously won't be).
> > Above the loop:
> >
> > if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
> > tsz = buflen;
> >
> > and then inside the loop:
> >
> > tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
> >
> > so it's already limited to one page.
> >
> > That said, it *might* be worth moving the temporary allocation to the
> > top, or even to move it to open_kcore(). It used to be a special case
> > for just the vmalloc region, now it's always done.
> >
> > So instead of having two different copies of the same special case for
> > the two different cases, why not try to unify them and just have one
> > common (page-sized) buffer allocation?

I'll give this more testing, but it looks ok so far,
also maybe it should be split into 2 parts:
- adding the common buffer
- ktext using bounce buffer

jirka


diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index a939f5ed7f89..7405b3894183 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
static ssize_t
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
{
+ char *buf = file->private_data;
ssize_t acc = 0;
size_t size, tsz;
size_t elf_buflen;
@@ -500,23 +501,20 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (clear_user(buffer, tsz))
return -EFAULT;
} else if (is_vmalloc_or_module_addr((void *)start)) {
- char * elf_buf;
-
- elf_buf = kzalloc(tsz, GFP_KERNEL);
- if (!elf_buf)
- return -ENOMEM;
- vread(elf_buf, (char *)start, tsz);
+ vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, elf_buf, tsz)) {
- kfree(elf_buf);
+ if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
- }
- kfree(elf_buf);
} else {
if (kern_addr_valid(start)) {
unsigned long n;

- n = copy_to_user(buffer, (char *)start, tsz);
+ /*
+ * Using bounce buffer to bypass the
+ *hardened user copy kernel text checks.
+ */
+ memcpy(buf, (char *) start, tsz);
+ n = copy_to_user(buffer, buf, tsz);
/*
* We cannot distinguish between fault on source
* and fault on destination. When this happens
@@ -549,6 +547,11 @@ static int open_kcore(struct inode *inode, struct file *filp)
{
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+
+ filp->private_data = (void *) __get_free_pages(GFP_KERNEL, 0);
+ if (!filp->private_data)
+ return -ENOMEM;
+
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
@@ -556,13 +559,20 @@ static int open_kcore(struct inode *inode, struct file *filp)
i_size_write(inode, proc_root_kcore->size);
inode_unlock(inode);
}
+
return 0;
}

+static int release_kcore(struct inode *inode, struct file *file)
+{
+ free_pages((unsigned long) file->private_data, 0);
+ return 0;
+}

static const struct file_operations proc_kcore_operations = {
.read = read_kcore,
.open = open_kcore,
+ .release = release_kcore,
.llseek = default_llseek,
};