[PATCH v2 04/14] KVM: x86: use atomic_t instead of pit.inject_lock

From: Radim KrÄmÃÅ
Date: Wed Feb 17 2016 - 14:15:25 EST


The lock was an overkill, the same can be done with atomics.

A mb() was added in kvm_pit_ack_irq, to pair with implicit barrier
between pit_timer_fn and pit_do_work. The mb() prevents a race that
could happen if pending == 0 and irq_ack == 0:

kvm_pit_ack_irq: | pit_timer_fn:
p = atomic_read(&ps->pending); |
| atomic_inc(&ps->pending);
| queue_work(pit_do_work);
| pit_do_work:
| atomic_xchg(&ps->irq_ack, 0);
| return;
atomic_set(&ps->irq_ack, 1); |
if (p == 0) return; |

where the interrupt would not be delivered in this tick of pit_timer_fn.
PIT would have eventually delivered the interrupt, but we sacrifice
perofmance to make sure that interrupts are not needlessly delayed.

sfence isn't enough: atomic_dec_if_positive does atomic_read first and
x86 can reorder loads before stores. lfence isn't enough: store can
pass lfence, turning it into a nop.
A compiler barrier would be more than enough as fences would matter only
if the CPU stalled for unbelievably long.

This patch doesn't do anything in kvm_pit_reset_reinject, because any
order of resets can race, but the result differs by at most one
interrupt, which is ok, because it's the same result as if the reset
happened at a slightly different time. (Original code didn't protect
the reset path with a proper lock, so users have to be robust.)

Signed-off-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
---
v2:
* Add and comment memory barriers. [Paolo]
* Adapt to new contexts.

arch/x86/kvm/i8254.c | 56 +++++++++++++++++++++-------------------------------
arch/x86/kvm/i8254.h | 3 +--
2 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index bdbb3f076e72..cbb911728ac8 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -237,11 +237,13 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
irq_ack_notifier);

- spin_lock(&ps->inject_lock);
+ atomic_set(&ps->irq_ack, 1);
+ /* irq_ack should be set before pending is read. Order accesses with
+ * inc(pending) in pit_timer_fn and xchg(irq_ack, 0) in pit_do_work.
+ */
+ smp_mb();
if (atomic_dec_if_positive(&ps->pending) > 0 && ps->reinject)
queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
- ps->irq_ack = 1;
- spin_unlock(&ps->inject_lock);
}

void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -272,36 +274,25 @@ static void pit_do_work(struct kthread_work *work)
struct kvm_vcpu *vcpu;
int i;
struct kvm_kpit_state *ps = &pit->pit_state;
- int inject = 0;

- /* Try to inject pending interrupts when
- * last one has been acked.
+ if (ps->reinject && !atomic_xchg(&ps->irq_ack, 0))
+ return;
+
+ kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
+ kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
+
+ /*
+ * Provides NMI watchdog support via Virtual Wire mode.
+ * The route is: PIT -> LVT0 in NMI mode.
+ *
+ * Note: Our Virtual Wire implementation does not follow
+ * the MP specification. We propagate a PIT interrupt to all
+ * VCPUs and only when LVT0 is in NMI mode. The interrupt can
+ * also be simultaneously delivered through PIC and IOAPIC.
*/
- spin_lock(&ps->inject_lock);
- if (!ps->reinject)
- inject = 1;
- else if (ps->irq_ack) {
- ps->irq_ack = 0;
- inject = 1;
- }
- spin_unlock(&ps->inject_lock);
- if (inject) {
- kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
- kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
-
- /*
- * Provides NMI watchdog support via Virtual Wire mode.
- * The route is: PIT -> PIC -> LVT0 in NMI mode.
- *
- * Note: Our Virtual Wire implementation is simplified, only
- * propagating PIT interrupts to all VCPUs when they have set
- * LVT0 to NMI delivery. Other PIC interrupts are just sent to
- * VCPU0, and only if its LVT0 is in EXTINT mode.
- */
- if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_apic_nmi_wd_deliver(vcpu);
- }
+ if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_apic_nmi_wd_deliver(vcpu);
}

static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
@@ -324,7 +315,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
static inline void kvm_pit_reset_reinject(struct kvm_pit *pit)
{
atomic_set(&pit->pit_state.pending, 0);
- pit->pit_state.irq_ack = 1;
+ atomic_set(&pit->pit_state.irq_ack, 1);
}

static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
@@ -691,7 +682,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)

mutex_init(&pit->pit_state.lock);
mutex_lock(&pit->pit_state.lock);
- spin_lock_init(&pit->pit_state.inject_lock);

pid = get_pid(task_tgid(current));
pid_nr = pid_vnr(pid);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index c84990b42b5b..f8cf4b84f435 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -33,8 +33,7 @@ struct kvm_kpit_state {
u32 speaker_data_on;
struct mutex lock;
struct kvm_pit *pit;
- spinlock_t inject_lock;
- unsigned long irq_ack;
+ atomic_t irq_ack;
struct kvm_irq_ack_notifier irq_ack_notifier;
};

--
2.7.1