Re: [PATCH v1 02/12] xen/hvmlite: Factor out common kernel init code

From: Luis R. Rodriguez
Date: Fri Jan 22 2016 - 18:01:52 EST


On Fri, Jan 22, 2016 at 04:35:48PM -0500, Boris Ostrovsky wrote:
> HVMlite guests (to be introduced in subsequent patches) share most
> of the kernel initialization code with PV(H).
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> ---
> arch/x86/xen/enlighten.c | 225 ++++++++++++++++++++++++----------------------
> 1 files changed, 119 insertions(+), 106 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index d09e4c9..2cf446a 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1505,19 +1505,14 @@ static void __init xen_pvh_early_guest_init(void)
> }
> #endif /* CONFIG_XEN_PVH */
>
> -/* First C function to be called on Xen boot */
> -asmlinkage __visible void __init xen_start_kernel(void)
> +
> +static void __init xen_init_kernel(void)
> {
> struct physdev_set_iopl set_iopl;
> unsigned long initrd_start = 0;
> u64 pat;
> int rc;
>
> - if (!xen_start_info)
> - return;
> -
> - xen_domain_type = XEN_PV_DOMAIN;
> -
> xen_setup_features();
> #ifdef CONFIG_XEN_PVH
> xen_pvh_early_guest_init();
> @@ -1529,48 +1524,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
> if (xen_initial_domain())
> pv_info.features |= PV_SUPPORTED_RTC;
> pv_init_ops = xen_init_ops;
> - if (!xen_pvh_domain()) {
> - pv_cpu_ops = xen_cpu_ops;
> -
> - x86_platform.get_nmi_reason = xen_get_nmi_reason;
> - }
> -
> - if (xen_feature(XENFEAT_auto_translated_physmap))
> - x86_init.resources.memory_setup = xen_auto_xlated_memory_setup;
> - else
> - x86_init.resources.memory_setup = xen_memory_setup;
> - x86_init.oem.arch_setup = xen_arch_setup;
> - x86_init.oem.banner = xen_banner;
>
> xen_init_time_ops();
>
> - /*
> - * Set up some pagetable state before starting to set any ptes.
> - */
> -
> - xen_init_mmu_ops();
> -
> - /* Prevent unwanted bits from being set in PTEs. */
> - __supported_pte_mask &= ~_PAGE_GLOBAL;
> -
> - /*
> - * Prevent page tables from being allocated in highmem, even
> - * if CONFIG_HIGHPTE is enabled.
> - */
> - __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
> -
> - /* Work out if we support NX */
> - x86_configure_nx();
> -
> - /* Get mfn list */
> - xen_build_dynamic_phys_to_machine();
> -
> - /*
> - * Set up kernel GDT and segment registers, mainly so that
> - * -fstack-protector code can be executed.
> - */
> - xen_setup_gdt(0);
> -
> xen_init_irq_ops();
> xen_init_cpuid_mask();
>
> @@ -1580,21 +1536,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
> */
> xen_init_apic();
> #endif
> -
> - if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
> - pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
> - pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
> - }
> -
> machine_ops = xen_machine_ops;
>
> - /*
> - * The only reliable way to retain the initial address of the
> - * percpu gdt_page is to remember it here, so we can go and
> - * mark it RW later, when the initial percpu area is freed.
> - */
> - xen_initial_gdt = &per_cpu(gdt_page, 0);
> -
> xen_smp_init();
>
> #ifdef CONFIG_ACPI_NUMA
> @@ -1609,66 +1552,126 @@ asmlinkage __visible void __init xen_start_kernel(void)
> possible map and a non-dummy shared_info. */
> per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
>
> - local_irq_disable();
> - early_boot_irqs_disabled = true;
> + if (!xen_hvm_domain()) {
> + if (!xen_pvh_domain()) {
> + pv_cpu_ops = xen_cpu_ops;
>
> - xen_raw_console_write("mapping kernel into physical memory\n");
> - xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
> - xen_start_info->nr_pages);
> - xen_reserve_special_pages();
> + x86_platform.get_nmi_reason = xen_get_nmi_reason;
> + }
>
> - /*
> - * Modify the cache mode translation tables to match Xen's PAT
> - * configuration.
> - */
> - rdmsrl(MSR_IA32_CR_PAT, pat);
> - pat_init_cache_modes(pat);
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + x86_init.resources.memory_setup =
> + xen_auto_xlated_memory_setup;
> + else
> + x86_init.resources.memory_setup = xen_memory_setup;
> + x86_init.oem.arch_setup = xen_arch_setup;
> + x86_init.oem.banner = xen_banner;
>
> - /* keep using Xen gdt for now; no urgent need to change it */
> + /*
> + * Set up some pagetable state before starting to set any ptes.
> + */
>
> -#ifdef CONFIG_X86_32
> - pv_info.kernel_rpl = 1;
> - if (xen_feature(XENFEAT_supervisor_mode_kernel))
> - pv_info.kernel_rpl = 0;
> -#else
> - pv_info.kernel_rpl = 0;
> -#endif
> - /* set the limit of our address space */
> - xen_reserve_top();
> + xen_init_mmu_ops();
> +
> + /* Prevent unwanted bits from being set in PTEs. */
> + __supported_pte_mask &= ~_PAGE_GLOBAL;
>
> - /* PVH: runs at default kernel iopl of 0 */
> - if (!xen_pvh_domain()) {
> /*
> - * We used to do this in xen_arch_setup, but that is too late
> - * on AMD were early_cpu_init (run before ->arch_setup()) calls
> - * early_amd_init which pokes 0xcf8 port.
> + * Prevent page tables from being allocated in highmem, even
> + * if CONFIG_HIGHPTE is enabled.
> */
> - set_iopl.iopl = 1;
> - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
> - if (rc != 0)
> - xen_raw_printk("physdev_op failed %d\n", rc);
> - }
> + __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
> +
> + /* Work out if we support NX */
> + x86_configure_nx();
> +
> + /* Get mfn list */
> + xen_build_dynamic_phys_to_machine();
> +
> + /*
> + * Set up kernel GDT and segment registers, mainly so that
> + * -fstack-protector code can be executed.
> + */
> + xen_setup_gdt(0);
> +
> + if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
> + pv_mmu_ops.ptep_modify_prot_start =
> + xen_ptep_modify_prot_start;
> + pv_mmu_ops.ptep_modify_prot_commit =
> + xen_ptep_modify_prot_commit;
> + }
> +
> + /*
> + * The only reliable way to retain the initial address of the
> + * percpu gdt_page is to remember it here, so we can go and
> + * mark it RW later, when the initial percpu area is freed.
> + */
> + xen_initial_gdt = &per_cpu(gdt_page, 0);
> +
> + xen_raw_console_write("mapping kernel into physical memory\n");
> + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
> + xen_start_info->nr_pages);
> + xen_reserve_special_pages();
> +
> + /*
> + * Modify the cache mode translation tables to match Xen's PAT
> + * configuration.
> + */
> + rdmsrl(MSR_IA32_CR_PAT, pat);
> + pat_init_cache_modes(pat);
> +
> + /* keep using Xen gdt for now; no urgent need to change it */
>
> #ifdef CONFIG_X86_32
> - /* set up basic CPUID stuff */
> - cpu_detect(&new_cpu_data);
> - set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
> - new_cpu_data.wp_works_ok = 1;
> - new_cpu_data.x86_capability[0] = cpuid_edx(1);
> + pv_info.kernel_rpl = 1;
> + if (xen_feature(XENFEAT_supervisor_mode_kernel))
> + pv_info.kernel_rpl = 0;
> +#else
> + pv_info.kernel_rpl = 0;
> +#endif
> + /* set the limit of our address space */
> + xen_reserve_top();
> +
> + /* PVH: runs at default kernel iopl of 0 */
> + if (!xen_pvh_domain()) {
> + /*
> + * We used to do this in xen_arch_setup, but that is
> + * too late on AMD were early_cpu_init (run before
> + * ->arch_setup()) calls early_amd_init which pokes
> + * 0xcf8 port.
> + */
> + set_iopl.iopl = 1;
> + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl,
> + &set_iopl);
> + if (rc != 0)
> + xen_raw_printk("physdev_op failed %d\n", rc);
> + }
> +
> +#ifdef CONFIG_X86_32
> + /* set up basic CPUID stuff */
> + cpu_detect(&new_cpu_data);
> + set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
> + new_cpu_data.wp_works_ok = 1;
> + new_cpu_data.x86_capability[0] = cpuid_edx(1);
> #endif

Whoa, I'm lost, its hard for me to tell what exactly stayed and what
got pulled into a helper, etc. Is there a possibility to split this
patch in 2 somehow to make the actual functional changes easier to
read? There are too many changes here and I just can't tell easily
what's going on.

Luis