Re: [PATCH] drivers/virt: vmgenid: add vm generation id driver

From: gregkh@xxxxxxxxxxxxxxxxxxx
Date: Fri Oct 16 2020 - 11:14:03 EST


On Fri, Oct 16, 2020 at 02:33:15PM +0000, Catangiu, Adrian Costin wrote:
> +config VMGENID
> + tristate "Virtual Machine Generation ID driver"
> + depends on ACPI
> + default M

Unless this is required to boot a machine, this should be removed.

> + help
> + This is a Virtual Machine Generation ID driver which provides
> + a virtual machine unique identifier. The provided UUID can be
> + watched through the FS interface exposed by this driver, and
> + thus can provide notifications for VM snapshot or cloning events.

Where is the uuid exposed in the filesystem?

> + This enables applications and libraries that store or cache
> + sensitive information, to know that they need to regenerate it
> + after process memory has been exposed to potential copying.

Module name?

> +
> config FSL_HV_MANAGER
> tristate "Freescale hypervisor management driver"
> depends on FSL_SOC
> diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
> index fd33124..a1f8dcc 100644
> --- a/drivers/virt/Makefile
> +++ b/drivers/virt/Makefile
> @@ -4,4 +4,5 @@
> #
>
> obj-$(CONFIG_FSL_HV_MANAGER) += fsl_hypervisor.o
> +obj-$(CONFIG_VMGENID) += vmgenid.o
> obj-y += vboxguest/
> diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
> new file mode 100644
> index 0000000..d314c72
> --- /dev/null
> +++ b/drivers/virt/vmgenid.c
> @@ -0,0 +1,419 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Virtual Machine Generation ID driver
> + *
> + * Copyright (C) 2018 Red Hat Inc, Copyright (C) 2020 Amazon.com Inc

That's not how you write copyright lines, please fix up.

> + * All rights reserved.
> + * Authors:
> + * Adrian Catangiu <acatan@xxxxxxxxxx>
> + * Or Idgar <oridgar@xxxxxxxxx>
> + * Gal Hammer <ghammer@xxxxxxxxxx>
> + *
> + */
> +#include <linux/acpi.h>
> +#include <linux/cdev.h>
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <linux/module.h>
> +#include <linux/poll.h>
> +#include <linux/uuid.h>
> +#include <linux/vmgenid.h>
> +
> +#define DEV_NAME "vmgenid"
> +ACPI_MODULE_NAME(DEV_NAME);
> +
> +struct dev_data {
> + struct cdev cdev;
> + dev_t dev_id;
> + unsigned long map_buf;
> +
> + void *uuid_iomap;
> + uuid_t uuid;
> + wait_queue_head_t read_wait;
> +
> + atomic_t watchers;
> + atomic_t outdated_watchers;
> + wait_queue_head_t outdated_wait;
> +};
> +
> +struct file_data {
> + struct dev_data *dev_data;
> + uuid_t acked_uuid;
> +};
> +
> +static bool vmgenid_uuid_matches(struct dev_data *priv, uuid_t *uuid)
> +{
> + return !memcmp(uuid, &priv->uuid, sizeof(uuid_t));
> +}
> +
> +static void vmgenid_put_outdated_watchers(struct dev_data *priv)
> +{
> + if (atomic_dec_and_test(&priv->outdated_watchers))
> + wake_up_interruptible(&priv->outdated_wait);
> +}
> +
> +static int vmgenid_open(struct inode *inode, struct file *file)
> +{
> + struct dev_data *priv =
> + container_of(inode->i_cdev, struct dev_data, cdev);
> + struct file_data *file_data =
> + kzalloc(sizeof(struct file_data), GFP_KERNEL);
> +
> + if (!file_data)
> + return -ENOMEM;
> +
> + file_data->acked_uuid = priv->uuid;
> + file_data->dev_data = priv;
> +
> + file->private_data = file_data;
> + atomic_inc(&priv->watchers);
> +
> + return 0;
> +}
> +
> +static int vmgenid_close(struct inode *inode, struct file *file)
> +{
> + struct file_data *file_data = (struct file_data *) file->private_data;
> + struct dev_data *priv = file_data->dev_data;
> +
> + if (!vmgenid_uuid_matches(priv, &file_data->acked_uuid))
> + vmgenid_put_outdated_watchers(priv);
> + atomic_dec(&priv->watchers);
> + kfree(file->private_data);
> +
> + return 0;
> +}
> +
> +static ssize_t
> +vmgenid_read(struct file *file, char __user *ubuf, size_t nbytes, loff_t *ppos)
> +{
> + struct file_data *file_data =
> + (struct file_data *) file->private_data;
> + struct dev_data *priv = file_data->dev_data;
> + ssize_t ret;
> +
> + if (nbytes == 0)
> + return 0;
> + /* disallow partial UUID reads */
> + if (nbytes < sizeof(uuid_t))
> + return -EINVAL;
> + nbytes = sizeof(uuid_t);
> +
> + if (vmgenid_uuid_matches(priv, &file_data->acked_uuid)) {
> + if (file->f_flags & O_NONBLOCK)
> + return -EAGAIN;
> + ret = wait_event_interruptible(
> + priv->read_wait,
> + !vmgenid_uuid_matches(priv, &file_data->acked_uuid)
> + );
> + if (ret)
> + return ret;
> + }
> +
> + ret = copy_to_user(ubuf, &priv->uuid, nbytes);
> + if (ret)
> + return -EFAULT;
> +
> + return nbytes;
> +}
> +
> +static ssize_t vmgenid_write(struct file *file, const char __user *ubuf,
> + size_t count, loff_t *ppos)
> +{
> + struct file_data *file_data = (struct file_data *) file->private_data;
> + struct dev_data *priv = file_data->dev_data;
> + uuid_t ack_uuid;
> +
> + /* disallow partial UUID writes */
> + if (count != sizeof(uuid_t))
> + return -EINVAL;
> + if (copy_from_user(&ack_uuid, ubuf, count))
> + return -EFAULT;
> + /* wrong UUID acknowledged */
> + if (!vmgenid_uuid_matches(priv, &ack_uuid))
> + return -EINVAL;
> +
> + if (!vmgenid_uuid_matches(priv, &file_data->acked_uuid)) {
> + /* update local view of UUID */
> + file_data->acked_uuid = ack_uuid;
> + vmgenid_put_outdated_watchers(priv);
> + }
> +
> + return (ssize_t)count;
> +}
> +
> +static __poll_t
> +vmgenid_poll(struct file *file, poll_table *wait)
> +{
> + __poll_t mask = 0;
> + struct file_data *file_data =
> + (struct file_data *) file->private_data;
> + struct dev_data *priv = file_data->dev_data;
> +
> + if (!vmgenid_uuid_matches(priv, &file_data->acked_uuid))
> + return EPOLLIN | EPOLLRDNORM;
> +
> + poll_wait(file, &priv->read_wait, wait);
> +
> + if (!vmgenid_uuid_matches(priv, &file_data->acked_uuid))
> + mask = EPOLLIN | EPOLLRDNORM;
> +
> + return mask;
> +}
> +
> +static long vmgenid_ioctl(struct file *file,
> + unsigned int cmd, unsigned long arg)
> +{
> + struct file_data *file_data =
> + (struct file_data *) file->private_data;
> + struct dev_data *priv = file_data->dev_data;
> + struct timespec __user *timeout = (void *) arg;
> + struct timespec kts;
> + ktime_t until;
> + int ret;
> +
> + switch (cmd) {
> + case VMGENID_GET_OUTDATED_WATCHERS:
> + ret = atomic_read(&priv->outdated_watchers);
> + break;
> + case VMGENID_WAIT_WATCHERS:
> + if (timeout) {
> + ret = copy_from_user(&kts, timeout, sizeof(kts));
> + if (ret)
> + return -EFAULT;
> + until = timespec_to_ktime(kts);

No validation of structure fields?

And you are exposing a kernel structure to userspace?

I don't see this function in Linus's tree right now, are you sure this
builds?


> + } else {
> + until = KTIME_MAX;
> + }
> +
> + ret = wait_event_interruptible_hrtimeout(
> + priv->outdated_wait,
> + !atomic_read(&priv->outdated_watchers),
> + until
> + );
> + break;
> + default:
> + ret = -EINVAL;
> + break;
> + }
> + return ret;
> +}
> +
> +static vm_fault_t vmgenid_vm_fault(struct vm_fault *vmf)
> +{
> + struct page *page;
> + struct file_data *file_data =
> + (struct file_data *) vmf->vma->vm_private_data;

You do know you don't need to cast stuff like this, right?

Also run checkpatch.pl so maintainers are not grumpy and tell you to run
checkpatch.pl.

> + struct dev_data *priv = file_data->dev_data;
> +
> + if (priv->map_buf) {
> + page = virt_to_page(priv->map_buf);
> + get_page(page);
> + vmf->page = page;
> + }
> +
> + return 0;
> +}
> +
> +static const struct vm_operations_struct vmgenid_vm_ops = {
> + .fault = vmgenid_vm_fault,
> +};
> +
> +static int vmgenid_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + if (vma->vm_pgoff != 0 || vma_pages(vma) > 1)
> + return -EINVAL;
> +
> + if ((vma->vm_flags & VM_WRITE) != 0)
> + return -EPERM;
> +
> + vma->vm_ops = &vmgenid_vm_ops;
> + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
> + vma->vm_private_data = file->private_data;
> +
> + return 0;
> +}
> +
> +static const struct file_operations fops = {
> + .owner = THIS_MODULE,
> + .mmap = vmgenid_mmap,
> + .open = vmgenid_open,
> + .release = vmgenid_close,
> + .read = vmgenid_read,
> + .write = vmgenid_write,
> + .poll = vmgenid_poll,
> + .compat_ioctl = vmgenid_ioctl,
> + .unlocked_ioctl = vmgenid_ioctl,
> +};
> +
> +static int vmgenid_acpi_map(struct dev_data *priv, acpi_handle handle)
> +{
> + int i;
> + phys_addr_t phys_addr;
> + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
> + acpi_status status;
> + union acpi_object *pss;
> + union acpi_object *element;
> +
> + status = acpi_evaluate_object(handle, "ADDR", NULL, &buffer);
> + if (ACPI_FAILURE(status)) {
> + ACPI_EXCEPTION((AE_INFO, status, "Evaluating ADDR"));
> + return -ENODEV;
> + }
> + pss = buffer.pointer;
> + if (!pss || pss->type != ACPI_TYPE_PACKAGE || pss->package.count != 2)
> + return -EINVAL;
> +
> + phys_addr = 0;
> + for (i = 0; i < pss->package.count; i++) {
> + element = &(pss->package.elements[i]);
> + if (element->type != ACPI_TYPE_INTEGER)
> + return -EINVAL;
> + phys_addr |= element->integer.value << i * 32;
> + }
> +
> + priv->uuid_iomap = acpi_os_map_memory(phys_addr, sizeof(uuid_t));
> + if (!priv->uuid_iomap) {
> + pr_err("Could not map memory at 0x%llx, size %u",
> + phys_addr,
> + (u32)sizeof(uuid_t));
> + return -ENOMEM;
> + }
> +
> + memcpy_fromio(&priv->uuid, priv->uuid_iomap, sizeof(uuid_t));
> + memcpy((void *) priv->map_buf, &priv->uuid, sizeof(uuid_t));
> +
> + return 0;
> +}
> +
> +static int vmgenid_acpi_add(struct acpi_device *device)
> +{
> + int ret;
> + struct dev_data *priv;
> +
> + if (!device)
> + return -EINVAL;
> +
> + priv = kzalloc(sizeof(struct dev_data), GFP_KERNEL);
> + if (!priv)
> + return -ENOMEM;
> +
> + priv->map_buf = get_zeroed_page(GFP_KERNEL);
> + if (!priv->map_buf) {
> + ret = -ENOMEM;
> + goto free;
> + }
> +
> + device->driver_data = priv;
> +
> + init_waitqueue_head(&priv->read_wait);
> + atomic_set(&priv->watchers, 0);
> + atomic_set(&priv->outdated_watchers, 0);
> + init_waitqueue_head(&priv->outdated_wait);
> +
> + ret = vmgenid_acpi_map(priv, device->handle);
> + if (ret < 0)
> + goto err;
> +
> + ret = alloc_chrdev_region(&priv->dev_id, 0, 1, DEV_NAME);

Why is this not a misc device driver instead of messing around with all
of this "by hand"?

By doing things this way, you are not creating a device node in /dev/
automatically, right? How did you test this?

> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> +/*
> + * Copyright (c) 2020, Amazon.com Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.

License boilerplate is not needed.

> + */
> +
> +#ifndef _UAPI_LINUX_VMGENID_H
> +#define _UAPI_LINUX_VMGENID_H
> +
> +#include <linux/ioctl.h>
> +#include <linux/time.h>
> +
> +#define VMGENID_IOCTL 0x2d
> +#define VMGENID_GET_OUTDATED_WATCHERS _IO(VMGENID_IOCTL, 1)
> +#define VMGENID_WAIT_WATCHERS _IOW(VMGENID_IOCTL, 2, struct timespec)

Why do you need ioctls for this? Why not just use read/write?

thanks,

greg k-h