[PATCH v2 01/23] KVM: arm64: Add tracepoints + stats for LPI cache effectiveness

From: Oliver Upton
Date: Tue Feb 13 2024 - 04:33:37 EST


LPI translation and injection has been shown to have a significant
impact on the performance of VM workloads, so it probably makes sense to
add some signals in this area.

Introduce the concept of a KVM tracepoint that associates with a VM
stat and use it for the LPI translation cache tracepoints. It isn't too
uncommon for a kernel hacker to attach to tracepoints, while at the same
time userspace may open a 'binary stats' FD to peek at the corresponding
VM stats.

Signed-off-by: Oliver Upton <oliver.upton@xxxxxxxxx>
---
arch/arm64/include/asm/kvm_host.h | 3 ++
arch/arm64/kvm/guest.c | 5 ++-
arch/arm64/kvm/vgic/trace.h | 66 +++++++++++++++++++++++++++++++
arch/arm64/kvm/vgic/vgic-its.c | 14 ++++++-
include/linux/kvm_host.h | 4 ++
5 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 21c57b812569..6f88b76373a5 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -966,6 +966,9 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)

struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
+ u64 vgic_its_trans_cache_hit;
+ u64 vgic_its_trans_cache_miss;
+ u64 vgic_its_trans_cache_victim;
};

struct kvm_vcpu_stat {
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index aaf1d4939739..354d67251fc2 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -30,7 +30,10 @@
#include "trace.h"

const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
- KVM_GENERIC_VM_STATS()
+ KVM_GENERIC_VM_STATS(),
+ STATS_DESC_COUNTER(VM, vgic_its_trans_cache_hit),
+ STATS_DESC_COUNTER(VM, vgic_its_trans_cache_miss),
+ STATS_DESC_COUNTER(VM, vgic_its_trans_cache_victim)
};

const struct kvm_stats_header kvm_vm_stats_header = {
diff --git a/arch/arm64/kvm/vgic/trace.h b/arch/arm64/kvm/vgic/trace.h
index 83c64401a7fc..ff6423f22c91 100644
--- a/arch/arm64/kvm/vgic/trace.h
+++ b/arch/arm64/kvm/vgic/trace.h
@@ -27,6 +27,72 @@ TRACE_EVENT(vgic_update_irq_pending,
__entry->vcpu_id, __entry->irq, __entry->level)
);

+TRACE_EVENT(vgic_its_trans_cache_hit,
+ TP_PROTO(__u64 db_addr, __u32 device_id, __u32 event_id, __u32 intid),
+ TP_ARGS(db_addr, device_id, event_id, intid),
+
+ TP_STRUCT__entry(
+ __field( __u64, db_addr )
+ __field( __u32, device_id )
+ __field( __u32, event_id )
+ __field( __u32, intid )
+ ),
+
+ TP_fast_assign(
+ __entry->db_addr = db_addr;
+ __entry->device_id = device_id;
+ __entry->event_id = event_id;
+ __entry->intid = intid;
+ ),
+
+ TP_printk("DB: %016llx, device_id %u, event_id %u, intid %u",
+ __entry->db_addr, __entry->device_id, __entry->event_id,
+ __entry->intid)
+);
+
+TRACE_EVENT(vgic_its_trans_cache_miss,
+ TP_PROTO(__u64 db_addr, __u32 device_id, __u32 event_id),
+ TP_ARGS(db_addr, device_id, event_id),
+
+ TP_STRUCT__entry(
+ __field( __u64, db_addr )
+ __field( __u32, device_id )
+ __field( __u32, event_id )
+ ),
+
+ TP_fast_assign(
+ __entry->db_addr = db_addr;
+ __entry->device_id = device_id;
+ __entry->event_id = event_id;
+ ),
+
+ TP_printk("DB: %016llx, device_id %u, event_id %u",
+ __entry->db_addr, __entry->device_id, __entry->event_id)
+);
+
+TRACE_EVENT(vgic_its_trans_cache_victim,
+ TP_PROTO(__u64 db_addr, __u32 device_id, __u32 event_id, __u32 intid),
+ TP_ARGS(db_addr, device_id, event_id, intid),
+
+ TP_STRUCT__entry(
+ __field( __u64, db_addr )
+ __field( __u32, device_id )
+ __field( __u32, event_id )
+ __field( __u32, intid )
+ ),
+
+ TP_fast_assign(
+ __entry->db_addr = db_addr;
+ __entry->device_id = device_id;
+ __entry->event_id = event_id;
+ __entry->intid = intid;
+ ),
+
+ TP_printk("DB: %016llx, device_id %u, event_id %u, intid %u",
+ __entry->db_addr, __entry->device_id, __entry->event_id,
+ __entry->intid)
+);
+
#endif /* _TRACE_VGIC_H */

#undef TRACE_INCLUDE_PATH
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index e2764d0ffa9f..59179268ac2d 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -20,6 +20,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>

+#include "trace.h"
#include "vgic.h"
#include "vgic-mmio.h"

@@ -636,8 +637,11 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
* to the interrupt, so drop the potential reference on what
* was in the cache, and increment it on the new interrupt.
*/
- if (cte->irq)
+ if (cte->irq) {
+ KVM_VM_TRACE_EVENT(kvm, vgic_its_trans_cache_victim, cte->db,
+ cte->devid, cte->eventid, cte->irq->intid);
__vgic_put_lpi_locked(kvm, cte->irq);
+ }

vgic_get_irq_kref(irq);

@@ -767,8 +771,14 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)

db = (u64)msi->address_hi << 32 | msi->address_lo;
irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data);
- if (!irq)
+ if (!irq) {
+ KVM_VM_TRACE_EVENT(kvm, vgic_its_trans_cache_miss, db, msi->devid,
+ msi->data);
return -EWOULDBLOCK;
+ }
+
+ KVM_VM_TRACE_EVENT(kvm, vgic_its_trans_cache_hit, db, msi->devid,
+ msi->data, irq->intid);

raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7e7fd25b09b3..846b447b6798 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1927,6 +1927,10 @@ struct _kvm_stats_desc {
HALT_POLL_HIST_COUNT), \
STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)

+#define KVM_VM_TRACE_EVENT(vm, event, ...) \
+ ((vm)->stat.event)++; trace_## event(__VA_ARGS__)
+
+
extern struct dentry *kvm_debugfs_dir;

ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
--
2.43.0.687.g38aa6559b0-goog