Re: [PATCH v21 02/29] LoongArch: KVM: Implement kvm module related interface

From: Huacai Chen
Date: Mon Sep 18 2023 - 22:41:54 EST


On Tue, Sep 19, 2023 at 10:38 AM zhaotianrui <zhaotianrui@xxxxxxxxxxx> wrote:
>
>
> 在 2023/9/17 下午12:21, Huacai Chen 写道:
> > Hi, Tianrui,
> >
> > On Fri, Sep 15, 2023 at 9:50 AM Tianrui Zhao <zhaotianrui@xxxxxxxxxxx> wrote:
> >> Implement LoongArch kvm module init, module exit interface,
> >> using kvm context to save the vpid info and vcpu world switch
> >> interface pointer.
> >>
> >> Reviewed-by: Bibo Mao <maobibo@xxxxxxxxxxx>
> >> Signed-off-by: Tianrui Zhao <zhaotianrui@xxxxxxxxxxx>
> >> ---
> >> arch/loongarch/kvm/main.c | 367 ++++++++++++++++++++++++++++++++++++++
> >> 1 file changed, 367 insertions(+)
> >> create mode 100644 arch/loongarch/kvm/main.c
> >>
> >> diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c
> >> new file mode 100644
> >> index 0000000000..0deb9273d8
> >> --- /dev/null
> >> +++ b/arch/loongarch/kvm/main.c
> >> @@ -0,0 +1,367 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
> >> + */
> >> +
> >> +#include <linux/err.h>
> >> +#include <linux/module.h>
> >> +#include <linux/kvm_host.h>
> >> +#include <asm/cacheflush.h>
> >> +#include <asm/cpufeature.h>
> >> +#include <asm/kvm_csr.h>
> >> +#include "trace.h"
> >> +
> >> +static struct kvm_context __percpu *vmcs;
> >> +struct kvm_world_switch *kvm_loongarch_ops;
> >> +unsigned long vpid_mask;
> >> +static int gcsr_flag[CSR_MAX_NUMS];
> >> +
> >> +int get_gcsr_flag(int csr)
> >> +{
> >> + if (csr < CSR_MAX_NUMS)
> >> + return gcsr_flag[csr];
> >> +
> >> + return INVALID_GCSR;
> >> +}
> >> +
> >> +static inline void set_gcsr_sw_flag(int csr)
> >> +{
> >> + if (csr < CSR_MAX_NUMS)
> >> + gcsr_flag[csr] |= SW_GCSR;
> >> +}
> >> +
> >> +static inline void set_gcsr_hw_flag(int csr)
> >> +{
> >> + if (csr < CSR_MAX_NUMS)
> >> + gcsr_flag[csr] |= HW_GCSR;
> >> +}
> >> +
> >> +/*
> >> + * The default value of gcsr_flag[CSR] is 0, and we use this
> >> + * function to set the flag to 1(SW_GCSR) or 2(HW_GCSR) if the
> >> + * gcsr is software or hardware. It will be used by get/set_gcsr,
> >> + * if gcsr_flag is HW we should use gcsrrd/gcsrwr to access it,
> >> + * else use sw csr to emulate it.
> >> + */
> >> +static void kvm_init_gcsr_flag(void)
> >> +{
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_CRMD);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_PRMD);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_EUEN);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_MISC);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_ECFG);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_ESTAT);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_ERA);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_BADV);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_BADI);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_EENTRY);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBIDX);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBEHI);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBELO0);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBELO1);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_ASID);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_PGDL);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_PGDH);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_PWCTL0);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_PWCTL1);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_STLBPGSIZE);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_RVACFG);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_CPUID);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG1);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG2);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG3);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS0);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS1);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS2);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS3);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS4);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS5);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS6);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_KS7);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TMID);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TCFG);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TVAL);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_CNTC);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_LLBCTL);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRENTRY);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRBADV);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRERA);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRSAVE);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRELO0);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRELO1);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBREHI);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_TLBRPRMD);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN0);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN1);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN2);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN3);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_MWPS);
> >> + set_gcsr_hw_flag(LOONGARCH_CSR_FWPS);
> >> +
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IMPCTL1);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IMPCTL2);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRCTL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRINFO1);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRINFO2);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRENTRY);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRERA);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_MERRSAVE);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_CTAG);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DEBUG);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DERA);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DESAVE);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PRCFG1);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PRCFG2);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PRCFG3);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PGD);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_TINTCLR);
> >> +
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_FWPS);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_FWPC);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_MWPS);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_MWPC);
> >> +
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB0ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB0MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB0CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB0ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB1ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB1MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB1CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB1ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB2ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB2MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB2CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB2ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB3ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB3MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB3CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB3ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB4ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB4MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB4CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB4ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB5ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB5MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB5CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB5ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB6ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB6MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB6CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB6ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB7ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB7MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB7CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_DB7ASID);
> >> +
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB0ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB0MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB0CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB0ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB1ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB1MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB1CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB1ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB2ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB2MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB2CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB2ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB3ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB3MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB3CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB3ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB4ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB4MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB4CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB4ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB5ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB5MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB5CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB5ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB6ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB6MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB6CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB6ASID);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB7ADDR);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB7MASK);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB7CTRL);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_IB7ASID);
> >> +
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL0);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR0);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL1);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR1);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL2);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR2);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL3);
> >> + set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR3);
> >> +}
> >> +
> >> +static void kvm_update_vpid(struct kvm_vcpu *vcpu, int cpu)
> >> +{
> >> + struct kvm_context *context;
> >> + unsigned long vpid;
> >> +
> >> + context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
> >> + vpid = context->vpid_cache + 1;
> >> + if (!(vpid & vpid_mask)) {
> >> + /* finish round of 64 bit loop */
> >> + if (unlikely(!vpid))
> >> + vpid = vpid_mask + 1;
> >> +
> >> + /* vpid 0 reserved for root */
> >> + ++vpid;
> >> +
> >> + /* start new vpid cycle */
> >> + kvm_flush_tlb_all();
> >> + }
> >> +
> >> + context->vpid_cache = vpid;
> >> + vcpu->arch.vpid = vpid;
> >> +}
> >> +
> >> +void kvm_check_vpid(struct kvm_vcpu *vcpu)
> >> +{
> >> + struct kvm_context *context;
> >> + bool migrated;
> >> + unsigned long ver, old, vpid;
> >> + int cpu;
> >> +
> >> + cpu = smp_processor_id();
> >> + /*
> >> + * Are we entering guest context on a different CPU to last time?
> >> + * If so, the vCPU's guest TLB state on this CPU may be stale.
> >> + */
> >> + context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
> >> + migrated = (vcpu->cpu != cpu);
> >> +
> >> + /*
> >> + * Check if our vpid is of an older version
> >> + *
> >> + * We also discard the stored vpid if we've executed on
> >> + * another CPU, as the guest mappings may have changed without
> >> + * hypervisor knowledge.
> >> + */
> >> + ver = vcpu->arch.vpid & ~vpid_mask;
> >> + old = context->vpid_cache & ~vpid_mask;
> >> + if (migrated || (ver != old)) {
> >> + kvm_update_vpid(vcpu, cpu);
> >> + trace_kvm_vpid_change(vcpu, vcpu->arch.vpid);
> >> + vcpu->cpu = cpu;
> >> + }
> >> +
> >> + /* Restore GSTAT(0x50).vpid */
> >> + vpid = (vcpu->arch.vpid & vpid_mask) << CSR_GSTAT_GID_SHIFT;
> >> + change_csr_gstat(vpid_mask << CSR_GSTAT_GID_SHIFT, vpid);
> >> +}
> >> +
> >> +static int kvm_loongarch_env_init(void)
> >> +{
> >> + struct kvm_context *context;
> >> + int cpu, order;
> >> + void *addr;
> >> +
> >> + vmcs = alloc_percpu(struct kvm_context);
> >> + if (!vmcs) {
> >> + pr_err("kvm: failed to allocate percpu kvm_context\n");
> >> + return -ENOMEM;
> >> + }
> >> +
> >> + kvm_loongarch_ops = kzalloc(sizeof(*kvm_loongarch_ops), GFP_KERNEL);
> >> + if (!kvm_loongarch_ops) {
> >> + free_percpu(vmcs);
> >> + vmcs = NULL;
> >> + return -ENOMEM;
> >> + }
> >> + /*
> >> + * There will be problem in world switch code if there
> >> + * is page fault reenter, since pgd register is shared
> >> + * between root kernel and kvm hypervisor. World switch
> >> + * entry need be unmapped area, cannot be tlb mapped area.
> >> + * In future if hw pagetable walking is supported, or there
> >> + * is separate pgd registers between root kernel and kvm
> >> + * hypervisor, copying about world switch code will not be used.
> >> + */
> >> +
> >> + order = get_order(kvm_vector_size + kvm_enter_guest_size);
> >> + addr = (void *)__get_free_pages(GFP_KERNEL, order);
> >> + if (!addr) {
> >> + free_percpu(vmcs);
> >> + vmcs = NULL;
> >> + kfree(kvm_loongarch_ops);
> >> + kvm_loongarch_ops = NULL;
> >> + return -ENOMEM;
> >> + }
> >> +
> >> + memcpy(addr, kvm_vector_entry, kvm_vector_size);
> >> + memcpy(addr + kvm_vector_size, kvm_enter_guest, kvm_enter_guest_size);
> > Why memcpy? In our internal repo, we use kvm_vector_entry and
> > kvm_enter_guest directly. The long comments above make me nervous
> > because Loongson-3A6000 already supports hardware pagetable walker.
> >
> > Huacai
> As mentioned in the comments, it need not this memcpy if hardware page
> walk is supported in 3A6000.
But why in our internal repo we don't use memcpy() but kvm can still
work on Loongson-3A5000?

Huacai

>
> Thanks
> Tianrui Zhao
> >
> >> + flush_icache_range((unsigned long)addr, (unsigned long)addr +
> >> + kvm_vector_size + kvm_enter_guest_size);
> >> + kvm_loongarch_ops->guest_eentry = addr;
> >> + kvm_loongarch_ops->enter_guest = addr + kvm_vector_size;
> >> + kvm_loongarch_ops->page_order = order;
> >> +
> >> + vpid_mask = read_csr_gstat();
> >> + vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT;
> >> + if (vpid_mask)
> >> + vpid_mask = GENMASK(vpid_mask - 1, 0);
> >> +
> >> + for_each_possible_cpu(cpu) {
> >> + context = per_cpu_ptr(vmcs, cpu);
> >> + context->vpid_cache = vpid_mask + 1;
> >> + context->last_vcpu = NULL;
> >> + }
> >> +
> >> + kvm_init_fault();
> >> + kvm_init_gcsr_flag();
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static void kvm_loongarch_env_exit(void)
> >> +{
> >> + unsigned long addr;
> >> +
> >> + if (vmcs)
> >> + free_percpu(vmcs);
> >> +
> >> + if (kvm_loongarch_ops) {
> >> + if (kvm_loongarch_ops->guest_eentry) {
> >> + addr = (unsigned long)kvm_loongarch_ops->guest_eentry;
> >> + free_pages(addr, kvm_loongarch_ops->page_order);
> >> + }
> >> + kfree(kvm_loongarch_ops);
> >> + }
> >> +}
> >> +
> >> +static int kvm_loongarch_init(void)
> >> +{
> >> + int r;
> >> +
> >> + if (!cpu_has_lvz) {
> >> + kvm_info("hardware virtualization not available\n");
> >> + return -ENODEV;
> >> + }
> >> + r = kvm_loongarch_env_init();
> >> + if (r)
> >> + return r;
> >> +
> >> + return kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
> >> +}
> >> +
> >> +static void kvm_loongarch_exit(void)
> >> +{
> >> + kvm_exit();
> >> + kvm_loongarch_env_exit();
> >> +}
> >> +
> >> +module_init(kvm_loongarch_init);
> >> +module_exit(kvm_loongarch_exit);
> >> +
> >> +#ifdef MODULE
> >> +static const struct cpu_feature loongarch_kvm_feature[] = {
> >> + { .feature = cpu_feature(LOONGARCH_LVZ) },
> >> + {},
> >> +};
> >> +MODULE_DEVICE_TABLE(cpu, loongarch_kvm_feature);
> >> +#endif
> >> --
> >> 2.39.1
> >>
>
>