Re: [PATCH 1/2] KVM: x86: introduce definitions to support static calls for kvm_x86_ops

From: Sean Christopherson
Date: Tue Jan 12 2021 - 18:05:18 EST


On Mon, Jan 11, 2021, Jason Baron wrote:
> Use static calls to improve kvm_x86_ops performance. Introduce the
> definitions that will be used by a subsequent patch to actualize the
> savings.
>
> Note that all kvm_x86_ops are covered here except for 'pmu_ops' and
> 'nested ops'. I think they can be covered by static calls in a simlilar
> manner, but were omitted from this series to reduce scope and because
> I don't think they have as large of a performance impact.
>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Borislav Petkov <bp@xxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
> Signed-off-by: Jason Baron <jbaron@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 65 +++++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/x86.c | 5 ++++
> 2 files changed, 70 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3ab7b46..e947522 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1087,6 +1087,65 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
> return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
> }
>
> +/*
> + * static calls cover all kvm_x86_ops except for functions under pmu_ops and
> + * nested_ops.
> + */
> +#define FOREACH_KVM_X86_OPS(F) \
> + F(hardware_enable); F(hardware_disable); F(hardware_unsetup); \
> + F(cpu_has_accelerated_tpr); F(has_emulated_msr); \
> + F(vcpu_after_set_cpuid); F(vm_init); F(vm_destroy); F(vcpu_create); \
> + F(vcpu_free); F(vcpu_reset); F(prepare_guest_switch); F(vcpu_load); \
> + F(vcpu_put); F(update_exception_bitmap); F(get_msr); F(set_msr); \
> + F(get_segment_base); F(get_segment); F(get_cpl); F(set_segment); \
> + F(get_cs_db_l_bits); F(set_cr0); F(is_valid_cr4); F(set_cr4); \
> + F(set_efer); F(get_idt); F(set_idt); F(get_gdt); F(set_gdt); \
> + F(sync_dirty_debug_regs); F(set_dr7); F(cache_reg); F(get_rflags); \
> + F(set_rflags); F(tlb_flush_all); F(tlb_flush_current); \
> + F(tlb_remote_flush); F(tlb_remote_flush_with_range); F(tlb_flush_gva); \
> + F(tlb_flush_guest); F(run); F(handle_exit); \
> + F(skip_emulated_instruction); F(update_emulated_instruction); \
> + F(set_interrupt_shadow); F(get_interrupt_shadow); F(patch_hypercall); \
> + F(set_irq); F(set_nmi); F(queue_exception); F(cancel_injection); \
> + F(interrupt_allowed); F(nmi_allowed); F(get_nmi_mask); F(set_nmi_mask);\
> + F(enable_nmi_window); F(enable_irq_window); F(update_cr8_intercept); \
> + F(check_apicv_inhibit_reasons); F(pre_update_apicv_exec_ctrl); \
> + F(refresh_apicv_exec_ctrl); F(hwapic_irr_update); F(hwapic_isr_update);\
> + F(guest_apic_has_interrupt); F(load_eoi_exitmap); \
> + F(set_virtual_apic_mode); F(set_apic_access_page_addr); \
> + F(deliver_posted_interrupt); F(sync_pir_to_irr); F(set_tss_addr); \
> + F(set_identity_map_addr); F(get_mt_mask); F(load_mmu_pgd); \
> + F(has_wbinvd_exit); F(write_l1_tsc_offset); F(get_exit_info); \
> + F(check_intercept); F(handle_exit_irqoff); F(request_immediate_exit); \
> + F(sched_in); F(slot_enable_log_dirty); F(slot_disable_log_dirty); \
> + F(flush_log_dirty); F(enable_log_dirty_pt_masked); \
> + F(cpu_dirty_log_size); F(pre_block); F(post_block); F(vcpu_blocking); \
> + F(vcpu_unblocking); F(update_pi_irte); F(apicv_post_state_restore); \
> + F(dy_apicv_has_pending_interrupt); F(set_hv_timer); F(cancel_hv_timer);\
> + F(setup_mce); F(smi_allowed); F(pre_enter_smm); F(pre_leave_smm); \
> + F(enable_smi_window); F(mem_enc_op); F(mem_enc_reg_region); \
> + F(mem_enc_unreg_region); F(get_msr_feature); \
> + F(can_emulate_instruction); F(apic_init_signal_blocked); \
> + F(enable_direct_tlbflush); F(migrate_timers); F(msr_filter_changed); \
> + F(complete_emulated_msr)

What about adding a dedicated .h file for this beast? Then it won't be so
painful to do one function per line. As is, updates to kvm_x86_ops will be
messy.

And add yet another macro layer (or maybe just tweak this one?) so that the
caller controls the line ending? I suppose you could also just use a comma, but
that's a bit dirty...

That would also allow using this to declare vmx_x86_ops and svm_x86_ops, which
would need a comma insteat of a semi-colon. There have a been a few attempts to
add a bit of automation to {vmx,svm}_x86_ops, this seems like it would be good
motivation to go in a different direction and declare/define all ops, e.g. the
VMX/SVM code could simply do something like:

#define DECLARE_VMX_X86_OP(func) \
.func = vmx_##func

static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vm_size = sizeof(struct kvm_vmx),
.vm_init = vmx_vm_init,

.pmu_ops = &intel_pmu_ops,
.nested_ops = &vmx_nested_ops,

FOREACH_KVM_X86_OPS(DECLARE_VMX_X86_OP)
};