[PATCH v6 10/17] KVM: arm64: Implement non-protected nVHE hyp stack unwinder

From: Kalesh Singh
Date: Tue Jul 26 2022 - 03:40:31 EST


Implements the common framework necessary for unwind() to work
for non-protected nVHE mode:
- on_accessible_stack()
- on_overflow_stack()
- unwind_next()

Non-protected nVHE unwind() is used to unwind and dump the hypervisor
stacktrace by the host in EL1

Signed-off-by: Kalesh Singh <kaleshsingh@xxxxxxxxxx>
Reviewed-by: Fuad Tabba <tabba@xxxxxxxxxx>
Tested-by: Fuad Tabba <tabba@xxxxxxxxxx>
---

Changes in v6:
- Add Fuad’s Reviewed-by and Tested-by tags

Changes in v5:
- Use regular comments instead of doc comments, per Fuad

arch/arm64/include/asm/stacktrace/common.h | 2 +
arch/arm64/include/asm/stacktrace/nvhe.h | 76 +++++++++++++++++++++-
arch/arm64/kvm/arm.c | 2 +-
3 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
index 45474b383630..3ebb69ea374a 100644
--- a/arch/arm64/include/asm/stacktrace/common.h
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -34,6 +34,7 @@ enum stack_type {
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
+ STACK_TYPE_HYP,
__NR_STACK_TYPES
};

@@ -186,6 +187,7 @@ static inline int unwind_next_common(struct unwind_state *state,
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+ * HYP -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
index 1192ae0f80c1..21082fd4a0b7 100644
--- a/arch/arm64/include/asm/stacktrace/nvhe.h
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -16,10 +16,19 @@

#include <asm/stacktrace/common.h>

+static inline bool on_hyp_stack(unsigned long sp, unsigned long size,
+ struct stack_info *info);
+
static inline bool on_accessible_stack(const struct task_struct *tsk,
unsigned long sp, unsigned long size,
struct stack_info *info)
{
+ if (on_accessible_stack_common(tsk, sp, size, info))
+ return true;
+
+ if (on_hyp_stack(sp, size, info))
+ return true;
+
return false;
}

@@ -31,15 +40,78 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
* (by the host in EL1).
*/

+DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
+DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+
+/*
+ * kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs
+ *
+ * The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to
+ * allow for guard pages below the stack. Consequently, the fixed offset address
+ * translation macros won't work here.
+ *
+ * The kernel VA is calculated as an offset from the kernel VA of the hypervisor
+ * stack base.
+ *
+ * Returns true on success and updates @addr to its corresponding kernel VA;
+ * otherwise returns false.
+ */
+static inline bool kvm_nvhe_stack_kern_va(unsigned long *addr,
+ enum stack_type type)
+{
+ struct kvm_nvhe_stacktrace_info *stacktrace_info;
+ unsigned long hyp_base, kern_base, hyp_offset;
+
+ stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
+
+ switch (type) {
+ case STACK_TYPE_HYP:
+ kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
+ hyp_base = (unsigned long)stacktrace_info->stack_base;
+ break;
+ case STACK_TYPE_OVERFLOW:
+ kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
+ hyp_base = (unsigned long)stacktrace_info->overflow_stack_base;
+ break;
+ default:
+ return false;
+ }
+
+ hyp_offset = *addr - hyp_base;
+
+ *addr = kern_base + hyp_offset;
+
+ return true;
+}
+
static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
- return false;
+ struct kvm_nvhe_stacktrace_info *stacktrace_info
+ = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
+ unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
+ unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+ return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
+}
+
+static inline bool on_hyp_stack(unsigned long sp, unsigned long size,
+ struct stack_info *info)
+{
+ struct kvm_nvhe_stacktrace_info *stacktrace_info
+ = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
+ unsigned long low = (unsigned long)stacktrace_info->stack_base;
+ unsigned long high = low + PAGE_SIZE;
+
+ return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
}

static inline int notrace unwind_next(struct unwind_state *state)
{
- return 0;
+ struct stack_info info;
+
+ return unwind_next_common(state, &info, kvm_nvhe_stack_kern_va);
}
NOKPROBE_SYMBOL(unwind_next);

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a0188144a122..6a64293108c5 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);

DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);

-static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);

--
2.37.1.359.gd136c6c3e2-goog