Re: [PATCH v12 04/11] kvm/x86: remove kvm memblock dependency

From: Paolo Bonzini
Date: Thu Jul 05 2018 - 12:12:13 EST


On 21/06/2018 23:25, Pavel Tatashin wrote:
> KVM clock is initialized later compared to other hypervisor because it has
> dependency on memblock allocator.
>
> Lets bring it inline with other hypervisors by removing this dependency by
> using memory from BSS instead of allocating it.
>
> The benefits:
> - remove ifdef from common code
> - earlier availability of TSC.
> - remove dependency on memblock, and reduce code
> - earlier kvm sched_clock()
>
> Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>

The reason for this is to avoid wasting a lot of BSS memory when KVM is
not in use. Thomas is going to send his take on this!

Paolo

> ---
> arch/x86/kernel/kvm.c | 1 +
> arch/x86/kernel/kvmclock.c | 64 ++++++--------------------------------
> arch/x86/kernel/setup.c | 7 ++---
> 3 files changed, 12 insertions(+), 60 deletions(-)
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 5b2300b818af..c65c232d3ddd 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -628,6 +628,7 @@ const __initconst struct hypervisor_x86 x86_hyper_kvm = {
> .name = "KVM",
> .detect = kvm_detect,
> .type = X86_HYPER_KVM,
> + .init.init_platform = kvmclock_init,
> .init.guest_late_init = kvm_guest_init,
> .init.x2apic_available = kvm_para_available,
> };
> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> index bf8d1eb7fca3..01558d41ec2c 100644
> --- a/arch/x86/kernel/kvmclock.c
> +++ b/arch/x86/kernel/kvmclock.c
> @@ -23,9 +23,9 @@
> #include <asm/apic.h>
> #include <linux/percpu.h>
> #include <linux/hardirq.h>
> -#include <linux/memblock.h>
> #include <linux/sched.h>
> #include <linux/sched/clock.h>
> +#include <linux/mm.h>
>
> #include <asm/mem_encrypt.h>
> #include <asm/x86_init.h>
> @@ -44,6 +44,11 @@ static int parse_no_kvmclock(char *arg)
> }
> early_param("no-kvmclock", parse_no_kvmclock);
>
> +/* Aligned to page sizes to match whats mapped via vsyscalls to userspace */
> +#define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS)
> +#define WALL_CLOCK_SIZE (sizeof(struct pvclock_wall_clock))
> +static u8 hv_clock_mem[PAGE_ALIGN(HV_CLOCK_SIZE)] __aligned(PAGE_SIZE);
> +static u8 wall_clock_mem[PAGE_ALIGN(WALL_CLOCK_SIZE)] __aligned(PAGE_SIZE);
> /* The hypervisor will put information about time periodically here */
> static struct pvclock_vsyscall_time_info *hv_clock;
> static struct pvclock_wall_clock *wall_clock;
> @@ -244,43 +249,12 @@ static void kvm_shutdown(void)
> native_machine_shutdown();
> }
>
> -static phys_addr_t __init kvm_memblock_alloc(phys_addr_t size,
> - phys_addr_t align)
> -{
> - phys_addr_t mem;
> -
> - mem = memblock_alloc(size, align);
> - if (!mem)
> - return 0;
> -
> - if (sev_active()) {
> - if (early_set_memory_decrypted((unsigned long)__va(mem), size))
> - goto e_free;
> - }
> -
> - return mem;
> -e_free:
> - memblock_free(mem, size);
> - return 0;
> -}
> -
> -static void __init kvm_memblock_free(phys_addr_t addr, phys_addr_t size)
> -{
> - if (sev_active())
> - early_set_memory_encrypted((unsigned long)__va(addr), size);
> -
> - memblock_free(addr, size);
> -}
> -
> void __init kvmclock_init(void)
> {
> struct pvclock_vcpu_time_info *vcpu_time;
> - unsigned long mem, mem_wall_clock;
> - int size, cpu, wall_clock_size;
> + int cpu;
> u8 flags;
>
> - size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
> -
> if (!kvm_para_available())
> return;
>
> @@ -290,28 +264,11 @@ void __init kvmclock_init(void)
> } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)))
> return;
>
> - wall_clock_size = PAGE_ALIGN(sizeof(struct pvclock_wall_clock));
> - mem_wall_clock = kvm_memblock_alloc(wall_clock_size, PAGE_SIZE);
> - if (!mem_wall_clock)
> - return;
> -
> - wall_clock = __va(mem_wall_clock);
> - memset(wall_clock, 0, wall_clock_size);
> -
> - mem = kvm_memblock_alloc(size, PAGE_SIZE);
> - if (!mem) {
> - kvm_memblock_free(mem_wall_clock, wall_clock_size);
> - wall_clock = NULL;
> - return;
> - }
> -
> - hv_clock = __va(mem);
> - memset(hv_clock, 0, size);
> + wall_clock = (struct pvclock_wall_clock *)wall_clock_mem;
> + hv_clock = (struct pvclock_vsyscall_time_info *)hv_clock_mem;
>
> if (kvm_register_clock("primary cpu clock")) {
> hv_clock = NULL;
> - kvm_memblock_free(mem, size);
> - kvm_memblock_free(mem_wall_clock, wall_clock_size);
> wall_clock = NULL;
> return;
> }
> @@ -354,13 +311,10 @@ int __init kvm_setup_vsyscall_timeinfo(void)
> int cpu;
> u8 flags;
> struct pvclock_vcpu_time_info *vcpu_time;
> - unsigned int size;
>
> if (!hv_clock)
> return 0;
>
> - size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
> -
> cpu = get_cpu();
>
> vcpu_time = &hv_clock[cpu].pvti;
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 403b2d2c31d2..01fcc8bf7c8f 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -1014,6 +1014,8 @@ void __init setup_arch(char **cmdline_p)
> */
> init_hypervisor_platform();
>
> + tsc_early_delay_calibrate();
> +
> x86_init.resources.probe_roms();
>
> /* after parse_early_param, so could debug it */
> @@ -1199,11 +1201,6 @@ void __init setup_arch(char **cmdline_p)
>
> memblock_find_dma_reserve();
>
> -#ifdef CONFIG_KVM_GUEST
> - kvmclock_init();
> -#endif
> -
> - tsc_early_delay_calibrate();
> if (!early_xdbc_setup_hardware())
> early_xdbc_register_console();
>
>