Re: [PATCH v2 15/15] Drivers: hv: Add modules to expose /dev/mshv to VMMs running on Hyper-V

From: Nuno Das Neves
Date: Fri Aug 18 2023 - 15:00:31 EST


On 8/18/2023 6:08 AM, Saurabh Singh Sengar wrote:
>> +
>> +config MSHV_VTL
>> + tristate "Microsoft Hyper-V VTL driver"
>> + depends on MSHV
>> + select HYPERV_VTL_MODE
>> + select TRANSPARENT_HUGEPAGE
>
> TRANSPARENT_HUGEPAGE can be avoided for now.
>

I will remove it in the next version. Thanks.
>> +
>> +#define HV_GET_REGISTER_BATCH_SIZE \
>> + (HV_HYP_PAGE_SIZE / sizeof(union hv_register_value))
>> +#define HV_SET_REGISTER_BATCH_SIZE \
>> + ((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_set_vp_registers)) \
>> + / sizeof(struct hv_register_assoc))
>> +
>> +int hv_call_get_vp_registers(
>> + u32 vp_index,
>> + u64 partition_id,
>> + u16 count,
>> + union hv_input_vtl input_vtl,
>> + struct hv_register_assoc *registers)
>> +{
>> + struct hv_input_get_vp_registers *input_page;
>> + union hv_register_value *output_page;
>> + u16 completed = 0;
>> + unsigned long remaining = count;
>> + int rep_count, i;
>> + u64 status;
>> + unsigned long flags;
>> +
>> + local_irq_save(flags);
>> +
>> + input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
>> + output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
>> +
>> + input_page->partition_id = partition_id;
>> + input_page->vp_index = vp_index;
>> + input_page->input_vtl.as_uint8 = input_vtl.as_uint8;
>> + input_page->rsvd_z8 = 0;
>> + input_page->rsvd_z16 = 0;
>> +
>> + while (remaining) {
>> + rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE);
>> + for (i = 0; i < rep_count; ++i)
>> + input_page->names[i] = registers[i].name;
>> +
>> + status = hv_do_rep_hypercall(HVCALL_GET_VP_REGISTERS,
>> rep_count,
>> + 0, input_page, output_page);
>
> Is there any possibility that count value is passed 0 by mistake ? In that case
> status will remain uninitialized.
>

These lines ensure rep_count is never 0 here:

while (remaining) {
rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE);

Remaining can't be 0 or the loop would exit, and HV_GET_REGISTER_BATCH_SIZE
is not 0, or we would never get any registers.

>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>> index 13f972e72375..ccd76f30a638 100644
>> --- a/drivers/hv/hv_common.c
>> +++ b/drivers/hv/hv_common.c
>> @@ -62,7 +62,11 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
>> */
>> static inline bool hv_output_arg_exists(void)
>> {
>> +#ifdef CONFIG_MSHV_VTL
>
> Although today both the option works together. But thinking
> which is more accurate CONFIG_HYPERV_VTL_MODE or
> CONFIG_MSHV_VTL here for scalability of VTL modules.
>

Good point. Though I'm not sure it matters too much right now,
since as you mention they will always be enabled together.

Does CONFIG_HYPERV_VTL_MODE use the output arg?

>> diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
>> new file mode 100644
>> index 000000000000..166480a73f3f
>> --- /dev/null
>> +++ b/drivers/hv/mshv.h
>> @@ -0,0 +1,156 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (c) 2023, Microsoft Corporation.
>> + */
>> +
>> +#ifndef _MSHV_H_
>> +#define _MSHV_H_
>> +
>> +#include <linux/spinlock.h>
>> +#include <linux/mutex.h>
>> +#include <linux/semaphore.h>
>> +#include <linux/sched.h>
>> +#include <linux/srcu.h>
>> +#include <linux/wait.h>
>> +#include <uapi/linux/mshv.h>
>> +
>> +/*
>> + * Hyper-V hypercalls
>> + */
>> +
>> +int hv_call_withdraw_memory(u64 count, int node, u64 partition_id);
>> +int hv_call_create_partition(
>> + u64 flags,
>> + struct hv_partition_creation_properties creation_properties,
>> + union hv_partition_isolation_properties isolation_properties,
>> + u64 *partition_id);
>> +int hv_call_initialize_partition(u64 partition_id);
>> +int hv_call_finalize_partition(u64 partition_id);
>> +int hv_call_delete_partition(u64 partition_id);
>> +int hv_call_map_gpa_pages(
>> + u64 partition_id,
>> + u64 gpa_target,
>> + u64 page_count, u32 flags,
>> + struct page **pages);
>> +int hv_call_unmap_gpa_pages(
>> + u64 partition_id,
>> + u64 gpa_target,
>> + u64 page_count, u32 flags);
>> +int hv_call_get_vp_registers(
>> + u32 vp_index,
>> + u64 partition_id,
>> + u16 count,
>> + union hv_input_vtl input_vtl,
>> + struct hv_register_assoc *registers);
>> +int hv_call_get_gpa_access_states(
>> + u64 partition_id,
>> + u32 count,
>> + u64 gpa_base_pfn,
>> + u64 state_flags,
>> + int *written_total,
>> + union hv_gpa_page_access_state *states);
>> +
>> +int hv_call_set_vp_registers(
>> + u32 vp_index,
>> + u64 partition_id,
>> + u16 count,
>> + union hv_input_vtl input_vtl,
>> + struct hv_register_assoc *registers);
>
> Nit: Opportunity to fix many of the checkpatch.pl related to line break here
> and many other places.
>

checkpatch.pl doesn't complain about anything in this file.

>> +static int
>> +mshv_assign_ioeventfd(struct mshv_partition *partition,
>> + struct mshv_ioeventfd *args)
>> + __must_hold(&partition->mutex)
>> +{
>> + struct kernel_mshv_ioeventfd *p;
>> + struct eventfd_ctx *eventfd;
>> + u64 doorbell_flags = 0;
>> + int ret;
>> +
>> + /* This mutex is currently protecting ioeventfd.items list */
>> + WARN_ON_ONCE(!mutex_is_locked(&partition->mutex));
>> +
>> + if (args->flags & MSHV_IOEVENTFD_FLAG_PIO)
>> + return -EOPNOTSUPP;
>> +
>> + /* must be natural-word sized */
>> + switch (args->len) {
>> + case 0:
>> + doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY;
>> + break;
>> + case 1:
>> + doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE;
>> + break;
>> + case 2:
>> + doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD;
>> + break;
>> + case 4:
>> + doorbell_flags =
>> HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD;
>> + break;
>> + case 8:
>> + doorbell_flags =
>> HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD;
>> + break;
>> + default:
>> + pr_warn("ioeventfd: invalid length specified\n");
>> + return -EINVAL;
>> + }
>> +
>> + /* check for range overflow */
>> + if (args->addr + args->len < args->addr)
>> + return -EINVAL;
>> +
>> + /* check for extra flags that we don't understand */
>> + if (args->flags & ~MSHV_IOEVENTFD_VALID_FLAG_MASK)
>> + return -EINVAL;
>> +
>> + eventfd = eventfd_ctx_fdget(args->fd);
>> + if (IS_ERR(eventfd))
>> + return PTR_ERR(eventfd);
>> +
>> + p = kzalloc(sizeof(*p), GFP_KERNEL);
>> + if (!p) {
>> + ret = -ENOMEM;
>> + goto fail;
>> + }
>> +
>> + p->addr = args->addr;
>> + p->length = args->len;
>> + p->eventfd = eventfd;
>> +
>> + /* The datamatch feature is optional, otherwise this is a wildcard */
>> + if (args->flags & MSHV_IOEVENTFD_FLAG_DATAMATCH)
>> + p->datamatch = args->datamatch;
>> + else {
>> + p->wildcard = true;
>> + doorbell_flags |=
>> HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE;
>> + }
>> +
>> + if (ioeventfd_check_collision(partition, p)) {
>> + ret = -EEXIST;
>> + goto unlock_fail;
>> + }
>> +
>> + ret = mshv_register_doorbell(partition->id, ioeventfd_mmio_write,
>> + (void *)partition, p->addr,
>> + p->datamatch, doorbell_flags);
>> + if (ret < 0) {
>> + pr_err("Failed to register ioeventfd doorbell!\n");
>
> Nit: Do we like to print function name at the start of pr_err.
>

Yes, we should. I will fix it. Thanks!