Re: [PATCH v2 2/3] x86/xen/time: setup vcpu 0 time info page

From: Juergen Gross
Date: Tue Sep 26 2017 - 06:05:51 EST


On 26/09/17 11:57, Joao Martins wrote:
> On 09/26/2017 10:32 AM, Juergen Gross wrote:
>> On 22/09/17 18:25, Joao Martins wrote:
> [snip]
>>> +static void xen_setup_vsyscall_time_info(void)
>>> +{
>>> + struct vcpu_register_time_memory_area t;
>>> + struct pvclock_vsyscall_time_info *ti;
>>> + struct pvclock_vcpu_time_info *pvti;
>>> + int ret;
>>> +
>>> + pvti = &__this_cpu_read(xen_vcpu)->time;
>>> +
>>> + /*
>>> + * We check ahead on the primary time info if this
>>> + * bit is supported hence speeding up Xen clocksource.
>>> + */
>>> + if (!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))
>>> + return;
>>> +
>>> + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
>>> +
>>> + ti = (struct pvclock_vsyscall_time_info *) get_zeroed_page(GFP_KERNEL);
>>
>> Coding style: omit the blank after the cast.
>>
> OK.
>
>>> + if (!ti)
>>> + return;
>>> +
>>> + t.addr.v = &ti->pvti;
>>> +
>>> + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
>>> + if (ret) {
>>> + pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret);
>>> + free_page((unsigned long) ti);
>>
>> Coding style again, once more below.
>>
> OK.
>
>>> + return;
>>> + }
>>> +
>>> + /*
>>> + * If the check above succedded this one should too since it's the
>>> + * same data on both primary and secondary time infos just different
>>> + * memory regions. But we still check it in case hypervisor is buggy.
>>> + */
>>> + pvti = &ti->pvti;
>>> + if (!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)) {
>>> + t.addr.v = NULL;
>>> + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
>>> + 0, &t);
>>> + if (!ret)
>>> + free_page((unsigned long) ti);
>>> +
>>> + pr_notice("xen: VCLOCK_PVCLOCK not supported (err %d)\n", ret);
>>
>> Mind making the message more descriptive? E.g. instead of reporting
>> "(err 0)" just telling "(tsc unstable)"?
>>
> Got it.
>
>>> + return;
>>> + }
>>> +
>>> + xen_clock = ti;
>>> + pvclock_set_pvti_cpu0_va(xen_clock);
>>> +
>>> + xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK;
>>> +}
>>> +
>>> static void __init xen_time_init(void)
>>> {
>>> int cpu = smp_processor_id();
>>> @@ -396,6 +495,7 @@ static void __init xen_time_init(void)
>>> setup_force_cpu_cap(X86_FEATURE_TSC);
>>>
>>> xen_setup_runstate_info(cpu);
>>> + xen_setup_vsyscall_time_info();
>>> xen_setup_timer(cpu);
>>> xen_setup_cpu_clockevents();
>>>
>>> diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
>>> index c8a6d224f7ed..f96dbedb33d4 100644
>>> --- a/arch/x86/xen/xen-ops.h
>>> +++ b/arch/x86/xen/xen-ops.h
>>> @@ -69,6 +69,8 @@ void xen_setup_runstate_info(int cpu);
>>> void xen_teardown_timer(int cpu);
>>> u64 xen_clocksource_read(void);
>>> void xen_setup_cpu_clockevents(void);
>>> +void xen_save_time_memory_area(void);
>>> +void xen_restore_time_memory_area(void);
>>> void __init xen_init_time_ops(void);
>>> void __init xen_hvm_init_time_ops(void);
>>>
>>> diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
>>> index 98188c87f5c1..8da788c5bd4f 100644
>>> --- a/include/xen/interface/vcpu.h
>>> +++ b/include/xen/interface/vcpu.h
>>> @@ -178,4 +178,32 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
>>>
>>> /* Send an NMI to the specified VCPU. @extra_arg == NULL. */
>>> #define VCPUOP_send_nmi 11
>>> +
>>> +/*
>>> + * Register a memory location to get a secondary copy of the vcpu time
>>> + * parameters. The master copy still exists as part of the vcpu shared
>>> + * memory area, and this secondary copy is updated whenever the master copy
>>> + * is updated (and using the same versioning scheme for synchronisation).
>>> + *
>>> + * The intent is that this copy may be mapped (RO) into userspace so
>>> + * that usermode can compute system time using the time info and the
>>> + * tsc. Usermode will see an array of vcpu_time_info structures, one
>>> + * for each vcpu, and choose the right one by an existing mechanism
>>> + * which allows it to get the current vcpu number (such as via a
>>> + * segment limit). It can then apply the normal algorithm to compute
>>> + * system time from the tsc.
>>> + *
>>> + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure.
>>> + */
>>> +#define VCPUOP_register_vcpu_time_memory_area 13
>>> +DEFINE_GUEST_HANDLE_STRUCT(vcpu_time_info_t);
>>> +struct vcpu_register_time_memory_area {
>>> + union {
>>> + GUEST_HANDLE(vcpu_time_info_t) h;
>>> + struct pvclock_vcpu_time_info *v;
>>> + uint64_t p;
>>> + } addr;
>>> +};
>>> +DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_time_memory_area_t);
>>> +
>>
>> Instead of adding only the operation you need, maybe you could sync just
>> the complete header from Xen?
>
> Yeap - I will update it. I suppose this means only adding VCPUOP_get_physid -
> the rest seems to be up-to-date AFAICT.

Correct.


Juergen