[PATCH v3 1/5] KVM: Add arch specific interfaces for sampling guest callchains

From: Tianyi Liu
Date: Sun Dec 10 2023 - 03:13:04 EST


This patch adds two architecture specific interfaces used by `perf kvm`:

- kvm_arch_vcpu_get_unwind_info: Return required data for unwinding
at once; including ip address, frame pointer, whether the guest vCPU
is running in 32 or 64 bits, and possibly the base addresses of
the segments.

- kvm_arch_vcpu_read_virt: Read data from a virtual address of the
guest vm.

`perf_kvm.h` has been added to the `include/linux/` directory to store
the interface structures between the perf events subsystem and the KVM
subsystem.

Since arm64 hasn't provided some foundational infrastructure, stub the
arm64 implementation for now because it's a bit complex.

The above interfaces require architecture support for
`CONFIG_GUEST_PERF_EVENTS`, which is only implemented by x86 and arm64
currently. For more architectures, they need to implement these interfaces
when enabling `CONFIG_GUEST_PERF_EVENTS`.

In terms of safety, guests are designed to be read-only in this feature,
and we will never inject page faults into the guests, ensuring that the
guests are not interfered by profiling. In extremely rare cases, if the
guest is modifying the page table, there is a possibility of reading
incorrect data. Additionally, if certain programs running in the guest OS
do not support frame pointers, it may also result in some erroneous data.
These erroneous data will eventually appear as `[unknown]` entries in the
report. It is sufficient as long as most of the records are correct for
profiling.

Signed-off-by: Tianyi Liu <i.pear@xxxxxxxxxxx>
---
MAINTAINERS | 1 +
arch/arm64/kvm/arm.c | 12 ++++++++++++
arch/x86/kvm/x86.c | 24 ++++++++++++++++++++++++
include/linux/kvm_host.h | 5 +++++
include/linux/perf_kvm.h | 18 ++++++++++++++++++
5 files changed, 60 insertions(+)
create mode 100644 include/linux/perf_kvm.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 788be9ab5b73..5ee36b4a9701 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16976,6 +16976,7 @@ F: arch/*/kernel/*/perf_event*.c
F: arch/*/kernel/perf_callchain.c
F: arch/*/kernel/perf_event*.c
F: include/linux/perf_event.h
+F: include/linux/perf_kvm.h
F: include/uapi/linux/perf_event.h
F: kernel/events/*
F: tools/lib/perf/
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e5f75f1f1085..5ae74b5c263a 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -574,6 +574,18 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
{
return *vcpu_pc(vcpu);
}
+
+bool kvm_arch_vcpu_get_unwind_info(struct kvm_vcpu *vcpu, struct perf_kvm_guest_unwind_info *info)
+{
+ /* TODO: implement */
+ return false;
+}
+
+bool kvm_arch_vcpu_read_virt(struct kvm_vcpu *vcpu, gva_t addr, void *dest, unsigned int length)
+{
+ /* TODO: implement */
+ return false;
+}
#endif

static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2c924075f6f1..9341cd80f665 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13039,6 +13039,30 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
return kvm_rip_read(vcpu);
}

+bool kvm_arch_vcpu_get_unwind_info(struct kvm_vcpu *vcpu, struct perf_kvm_guest_unwind_info *info)
+{
+ info->ip_pointer = kvm_rip_read(vcpu);
+ info->frame_pointer = kvm_register_read_raw(vcpu, VCPU_REGS_RBP);
+
+ info->is_guest_64bit = is_64_bit_mode(vcpu);
+ if (info->is_guest_64bit) {
+ info->segment_cs_base = 0;
+ info->segment_ss_base = 0;
+ } else {
+ info->segment_cs_base = get_segment_base(vcpu, VCPU_SREG_CS);
+ info->segment_ss_base = get_segment_base(vcpu, VCPU_SREG_SS);
+ }
+ return true;
+}
+
+bool kvm_arch_vcpu_read_virt(struct kvm_vcpu *vcpu, gva_t addr, void *dest, unsigned int length)
+{
+ struct x86_exception e;
+
+ /* Return true on success */
+ return kvm_read_guest_virt(vcpu, addr, dest, length, &e) == X86EMUL_CONTINUE;
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4944136efaa2..6f5ff4209b0c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -41,6 +41,7 @@
#include <linux/kvm_para.h>

#include <linux/kvm_types.h>
+#include <linux/perf_kvm.h>

#include <asm/kvm_host.h>
#include <linux/kvm_dirty_ring.h>
@@ -1595,6 +1596,10 @@ static inline bool kvm_arch_intc_initialized(struct kvm *kvm)

#ifdef CONFIG_GUEST_PERF_EVENTS
unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu);
+bool kvm_arch_vcpu_get_unwind_info(struct kvm_vcpu *vcpu,
+ struct perf_kvm_guest_unwind_info *info);
+bool kvm_arch_vcpu_read_virt(struct kvm_vcpu *vcpu, gva_t addr, void *dest,
+ unsigned int length);

void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void));
void kvm_unregister_perf_callbacks(void);
diff --git a/include/linux/perf_kvm.h b/include/linux/perf_kvm.h
new file mode 100644
index 000000000000..e77eeebddabb
--- /dev/null
+++ b/include/linux/perf_kvm.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PERF_KVM_H
+#define _LINUX_PERF_KVM_H
+
+/*
+ * Structures as interface between Perf Event and KVM subsystem.
+ * Add more members for new architectures if necessary.
+ */
+
+struct perf_kvm_guest_unwind_info {
+ unsigned long ip_pointer;
+ unsigned long frame_pointer;
+ bool is_guest_64bit;
+ unsigned long segment_cs_base;
+ unsigned long segment_ss_base;
+};
+
+#endif /* _LINUX_PERF_KVM_H */
--
2.34.1