Re: [Xen-devel] [PATCH] xen: add new hypercall buffer mapping device

From: Andrew Cooper
Date: Fri Jun 15 2018 - 10:16:07 EST


On 15/06/18 14:17, Juergen Gross wrote:
> +MODULE_LICENSE("GPL");
> +
> +static int limit = 64;
> +module_param(limit, int, 0644);
> +MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by "
> + "the privcmd-buf device per open file");

I have a feeling that, once we try and remove some of the bounce
buffering, 64 pages will be somewhat restricting. In particular,
migration performance will benefit by keeping the logdirty bitmap buffer
persistently mapped, rather than allocated/bounced/deallocated on each
iteration.

However, perhaps 64 is fine for now.

> +static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct privcmd_buf_private *file_priv = file->private_data;
> + struct privcmd_buf_vma_private *vma_priv;
> + unsigned int count = vma_pages(vma);

This will truncate to 0 if anyone tried mmap()ing 8T (if I've done my
calculations correctly) of virtual address space.

> + unsigned int i;
> + int ret = 0;
> +
> + if (!(vma->vm_flags & VM_SHARED)) {
> + pr_err("Mapping must be shared\n");
> + return -EINVAL;
> + }
> +
> + if (file_priv->allocated + count > limit) {

cout > limit || (allocated + count) > limit to avoid overflows.

> + pr_err("Mapping limit reached!\n");
> + return -ENOSPC;
> + }
> +
> + vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
> + GFP_KERNEL);
> + if (!vma_priv)
> + return -ENOMEM;
> +
> + vma_priv->n_pages = count;
> + count = 0;
> + for (i = 0; i < vma_priv->n_pages; i++) {
> + vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
> + if (!vma_priv->pages[i])
> + break;
> + count++;
> + }
> +
> + mutex_lock(&file_priv->lock);
> +
> + file_priv->allocated += count;
> +
> + vma_priv->file_priv = file_priv;
> + vma_priv->users = 1;
> +
> + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;

Why DONTDUMP? Its just data, and stands a reasonable chance of being
related to the cause of a crash.

> + vma->vm_ops = &privcmd_buf_vm_ops;
> + vma->vm_private_data = vma_priv;
> +
> + list_add(&vma_priv->list, &file_priv->list);
> +
> + if (vma_priv->n_pages != count)
> + ret = -ENOMEM;
> + else
> + for (i = 0; i < vma_priv->n_pages; i++) {
> + ret = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
> + vma_priv->pages[i]);
> + if (ret)
> + break;
> + }
> +
> + if (ret)
> + privcmd_buf_vmapriv_free(vma_priv);
> +
> + mutex_unlock(&file_priv->lock);
> +
> + return ret;
> +}
> +
> +const struct file_operations xen_privcmdbuf_fops = {
> + .owner = THIS_MODULE,
> + .open = privcmd_buf_open,
> + .release = privcmd_buf_release,
> + .mmap = privcmd_buf_mmap,
> +};
> +EXPORT_SYMBOL_GPL(xen_privcmdbuf_fops);
> +
> +struct miscdevice xen_privcmdbuf_dev = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "xen/privcmd-buf",

Sorry to nitpick, but how about naming this just "xen/hypercall" ?

privcmd is currently a rather large security hole because it allows
userspace to have access to all the hypercalls, including the ones which
should be restricted to just the kernel. In the past, a plan has been
floated to slowly replace the use of the raw ioctl() with proper ioctls
for the hypercalls which userspace might reasonably use.
> diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
> index 71ddfb4cf61c..d752d0dd3d1d 100644
> --- a/drivers/xen/xenfs/super.c
> +++ b/drivers/xen/xenfs/super.c
> @@ -48,6 +48,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
> [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
> { "capabilities", &capabilities_file_ops, S_IRUGO },
> { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
> + { "privcmd-buf", &xen_privcmdbuf_fops, S_IRUSR|S_IWUSR },

Do we really need to provide the fallback here? /dev/xen has been
around for ages, and it would really be a good thing if we can
eventually retire xenfs.

~Andrew