Re: [PATCH v11 08/13] KVM: X86: Introduce KVM_HC_PAGE_ENC_STATUS hypercall

From: Sean Christopherson
Date: Tue Apr 06 2021 - 11:48:30 EST


On Mon, Apr 05, 2021, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.kalra@xxxxxxx>

...

> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3768819693e5..78284ebbbee7 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1352,6 +1352,8 @@ struct kvm_x86_ops {
> int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
>
> void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
> + int (*page_enc_status_hc)(struct kvm_vcpu *vcpu, unsigned long gpa,
> + unsigned long sz, unsigned long mode);
> };
>
> struct kvm_x86_nested_ops {
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index c9795a22e502..fb3a315e5827 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1544,6 +1544,67 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
> return ret;
> }
>
> +static int sev_complete_userspace_page_enc_status_hc(struct kvm_vcpu *vcpu)
> +{
> + vcpu->run->exit_reason = 0;
> + kvm_rax_write(vcpu, vcpu->run->dma_sharing.ret);
> + ++vcpu->stat.hypercalls;
> + return kvm_skip_emulated_instruction(vcpu);
> +}
> +
> +int svm_page_enc_status_hc(struct kvm_vcpu *vcpu, unsigned long gpa,
> + unsigned long npages, unsigned long enc)
> +{
> + kvm_pfn_t pfn_start, pfn_end;
> + struct kvm *kvm = vcpu->kvm;
> + gfn_t gfn_start, gfn_end;
> +
> + if (!sev_guest(kvm))
> + return -EINVAL;
> +
> + if (!npages)
> + return 0;

Parth of me thinks passing a zero size should be an error not a nop. Either way
works, just feels a bit weird to allow this to be a nop.

> +
> + gfn_start = gpa_to_gfn(gpa);

This should check that @gpa is aligned.

> + gfn_end = gfn_start + npages;
> +
> + /* out of bound access error check */
> + if (gfn_end <= gfn_start)
> + return -EINVAL;
> +
> + /* lets make sure that gpa exist in our memslot */
> + pfn_start = gfn_to_pfn(kvm, gfn_start);
> + pfn_end = gfn_to_pfn(kvm, gfn_end);
> +
> + if (is_error_noslot_pfn(pfn_start) && !is_noslot_pfn(pfn_start)) {
> + /*
> + * Allow guest MMIO range(s) to be added
> + * to the shared pages list.
> + */
> + return -EINVAL;
> + }
> +
> + if (is_error_noslot_pfn(pfn_end) && !is_noslot_pfn(pfn_end)) {
> + /*
> + * Allow guest MMIO range(s) to be added
> + * to the shared pages list.
> + */
> + return -EINVAL;
> + }

I don't think KVM should do any checks beyond gfn_end <= gfn_start. Just punt
to userspace and give userspace full say over what is/isn't legal.

> +
> + if (enc)
> + vcpu->run->exit_reason = KVM_EXIT_DMA_UNSHARE;
> + else
> + vcpu->run->exit_reason = KVM_EXIT_DMA_SHARE;

Use a single exit and pass "enc" via kvm_run. I also strongly dislike "DMA",
there's no guarantee the guest is sharing memory for DMA.

I think we can usurp KVM_EXIT_HYPERCALL for this? E.g.

vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
vcpu->run->hypercall.nr = KVM_HC_PAGE_ENC_STATUS;
vcpu->run->hypercall.args[0] = gfn_start << PAGE_SHIFT;
vcpu->run->hypercall.args[1] = npages * PAGE_SIZE;
vcpu->run->hypercall.args[2] = enc;
vcpu->run->hypercall.longmode = is_64_bit_mode(vcpu);

> +
> + vcpu->run->dma_sharing.addr = gfn_start;

Addresses and pfns are not the same thing. If you're passing the size in bytes,
then it's probably best to pass the gpa, not the gfn. Same for the params from
the guest, they should be in the same "domain".

> + vcpu->run->dma_sharing.len = npages * PAGE_SIZE;
> + vcpu->arch.complete_userspace_io =
> + sev_complete_userspace_page_enc_status_hc;

I vote to drop the "userspace" part, it's already quite verbose.

vcpu->arch.complete_userspace_io = sev_complete_page_enc_status_hc;

> +
> + return 0;
> +}
> +

..

> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f7d12fca397b..ef5c77d59651 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8273,6 +8273,18 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> kvm_sched_yield(vcpu->kvm, a0);
> ret = 0;
> break;
> + case KVM_HC_PAGE_ENC_STATUS: {
> + int r;
> +
> + ret = -KVM_ENOSYS;
> + if (kvm_x86_ops.page_enc_status_hc) {
> + r = kvm_x86_ops.page_enc_status_hc(vcpu, a0, a1, a2);

Use static_call().

> + if (r >= 0)
> + return r;
> + ret = r;
> + }

Hmm, an alternative to adding a kvm_x86_ops hook would be to tag the VM as
supporting/allowing the hypercall. That would clean up this code, ensure VMX
and SVM don't end up creating a different userspace ABI, and make it easier to
reuse the hypercall in the future (I'm still hopeful :-) ). E.g.

case KVM_HC_PAGE_ENC_STATUS: {
u64 gpa = a0, nr_bytes = a1;

if (!vcpu->kvm->arch.page_enc_hc_enable)
break;

if (!PAGE_ALIGNED(gpa) || !PAGE_ALIGNED(nr_bytes) ||
!nr_bytes || gpa + nr_bytes <= gpa)) {
ret = -EINVAL;
break;
}

vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
vcpu->run->hypercall.nr = KVM_HC_PAGE_ENC_STATUS;
vcpu->run->hypercall.args[0] = gpa;
vcpu->run->hypercall.args[1] = nr_bytes;
vcpu->run->hypercall.args[2] = enc;
vcpu->run->hypercall.longmode = op_64_bit;
vcpu->arch.complete_userspace_io = complete_page_enc_hc;
return 0;
}


> + break;
> + }
> default:
> ret = -KVM_ENOSYS;
> break;