[PATCHv8 14/14] x86/tdx: Add unaccepted memory support

From: Kirill A. Shutemov
Date: Tue Dec 06 2022 - 20:51:18 EST


Hookup TDX-specific code to accept memory.

Accepting the memory is the same process as converting memory from
shared to private: kernel notifies VMM with MAP_GPA hypercall and then
accept pages with ACCEPT_PAGE module call.

The implementation in core kernel uses tdx_enc_status_changed(). It
already used for converting memory to shared and back for I/O
transactions.

Boot stub provides own implementation of tdx_accept_memory(). It is
similar in structure to tdx_enc_status_changed(), but only cares about
converting memory to private.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
arch/x86/Kconfig | 2 +
arch/x86/boot/compressed/Makefile | 2 +-
arch/x86/boot/compressed/error.c | 19 ++++++
arch/x86/boot/compressed/error.h | 1 +
arch/x86/boot/compressed/mem.c | 33 +++++++++-
arch/x86/boot/compressed/tdx-shared.c | 2 +
arch/x86/boot/compressed/tdx.c | 39 +++++++++++
arch/x86/coco/tdx/Makefile | 2 +-
arch/x86/coco/tdx/tdx-shared.c | 95 +++++++++++++++++++++++++++
arch/x86/coco/tdx/tdx.c | 86 +-----------------------
arch/x86/include/asm/shared/tdx.h | 2 +
arch/x86/include/asm/tdx.h | 2 +
arch/x86/mm/unaccepted_memory.c | 9 ++-
13 files changed, 206 insertions(+), 88 deletions(-)
create mode 100644 arch/x86/boot/compressed/tdx-shared.c
create mode 100644 arch/x86/coco/tdx/tdx-shared.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 947937d327c6..bc031fd1d95e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -888,6 +888,8 @@ config INTEL_TDX_GUEST
select ARCH_HAS_CC_PLATFORM
select X86_MEM_ENCRYPT
select X86_MCE
+ select UNACCEPTED_MEMORY
+ select EFI_STUB
help
Support running as a guest under Intel TDX. Without this support,
the guest kernel can not boot or run under TDX.
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index a9937b50c37f..38c42c2b4349 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -106,7 +106,7 @@ ifdef CONFIG_X86_64
endif

vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
-vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
+vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdx-shared.o $(obj)/tdcall.o
vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/bitmap.o $(obj)/find.o $(obj)/mem.o

vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c
index c881878e56d3..5313c5cb2b80 100644
--- a/arch/x86/boot/compressed/error.c
+++ b/arch/x86/boot/compressed/error.c
@@ -22,3 +22,22 @@ void error(char *m)
while (1)
asm("hlt");
}
+
+/* EFI libstub provides vsnprintf() */
+#ifdef CONFIG_EFI_STUB
+void panic(const char *fmt, ...)
+{
+ static char buf[1024];
+ va_list args;
+ int len;
+
+ va_start(args, fmt);
+ len = vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ if (len && buf[len - 1] == '\n')
+ buf[len - 1] = '\0';
+
+ error(buf);
+}
+#endif
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
index 1de5821184f1..86fe33b93715 100644
--- a/arch/x86/boot/compressed/error.h
+++ b/arch/x86/boot/compressed/error.h
@@ -6,5 +6,6 @@

void warn(char *m);
void error(char *m) __noreturn;
+void panic(const char *fmt, ...) __noreturn __cold;

#endif /* BOOT_COMPRESSED_ERROR_H */
diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
index 626e4f10ba2c..23a84c46aa9b 100644
--- a/arch/x86/boot/compressed/mem.c
+++ b/arch/x86/boot/compressed/mem.c
@@ -5,6 +5,8 @@
#include "error.h"
#include "find.h"
#include "math.h"
+#include "tdx.h"
+#include <asm/shared/tdx.h>

#define PMD_SHIFT 21
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
@@ -12,10 +14,39 @@

extern struct boot_params *boot_params;

+/*
+ * accept_memory() and process_unaccepted_memory() called from EFI stub which
+ * runs before decompresser and its early_tdx_detect().
+ *
+ * Enumerate TDX directly from the early users.
+ */
+static bool early_is_tdx_guest(void)
+{
+ static bool once;
+ static bool is_tdx;
+
+ if (!IS_ENABLED(CONFIG_INTEL_TDX_GUEST))
+ return false;
+
+ if (!once) {
+ u32 eax, sig[3];
+
+ cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax,
+ &sig[0], &sig[2], &sig[1]);
+ is_tdx = !memcmp(TDX_IDENT, sig, sizeof(sig));
+ once = true;
+ }
+
+ return is_tdx;
+}
+
static inline void __accept_memory(phys_addr_t start, phys_addr_t end)
{
/* Platform-specific memory-acceptance call goes here */
- error("Cannot accept memory");
+ if (early_is_tdx_guest())
+ tdx_accept_memory(start, end);
+ else
+ error("Cannot accept memory: unknown platform\n");
}

/*
diff --git a/arch/x86/boot/compressed/tdx-shared.c b/arch/x86/boot/compressed/tdx-shared.c
new file mode 100644
index 000000000000..5ac43762fe13
--- /dev/null
+++ b/arch/x86/boot/compressed/tdx-shared.c
@@ -0,0 +1,2 @@
+#include "error.h"
+#include "../../coco/tdx/tdx-shared.c"
diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
index 918a7606f53c..de1d4a87418d 100644
--- a/arch/x86/boot/compressed/tdx.c
+++ b/arch/x86/boot/compressed/tdx.c
@@ -3,12 +3,17 @@
#include "../cpuflags.h"
#include "../string.h"
#include "../io.h"
+#include "align.h"
#include "error.h"
+#include "pgtable_types.h"

#include <vdso/limits.h>
#include <uapi/asm/vmx.h>

#include <asm/shared/tdx.h>
+#include <asm/page_types.h>
+
+static u64 cc_mask;

/* Called from __tdx_hypercall() for unrecoverable failure */
void __tdx_hypercall_failed(void)
@@ -16,6 +21,38 @@ void __tdx_hypercall_failed(void)
error("TDVMCALL failed. TDX module bug?");
}

+static u64 get_cc_mask(void)
+{
+ struct tdx_module_output out;
+ unsigned int gpa_width;
+
+ /*
+ * TDINFO TDX module call is used to get the TD execution environment
+ * information like GPA width, number of available vcpus, debug mode
+ * information, etc. More details about the ABI can be found in TDX
+ * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL
+ * [TDG.VP.INFO].
+ *
+ * The GPA width that comes out of this call is critical. TDX guests
+ * can not meaningfully run without it.
+ */
+ if (__tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out))
+ error("TDCALL GET_INFO failed (Buggy TDX module!)\n");
+
+ gpa_width = out.rcx & GENMASK(5, 0);
+
+ /*
+ * The highest bit of a guest physical address is the "sharing" bit.
+ * Set it for shared pages and clear it for private pages.
+ */
+ return BIT_ULL(gpa_width - 1);
+}
+
+u64 cc_mkdec(u64 val)
+{
+ return val & ~cc_mask;
+}
+
static inline unsigned int tdx_io_in(int size, u16 port)
{
struct tdx_hypercall_args args = {
@@ -70,6 +107,8 @@ void early_tdx_detect(void)
if (memcmp(TDX_IDENT, sig, sizeof(sig)))
return;

+ cc_mask = get_cc_mask();
+
/* Use hypercalls instead of I/O instructions */
pio_ops.f_inb = tdx_inb;
pio_ops.f_outb = tdx_outb;
diff --git a/arch/x86/coco/tdx/Makefile b/arch/x86/coco/tdx/Makefile
index 46c55998557d..2c7dcbf1458b 100644
--- a/arch/x86/coco/tdx/Makefile
+++ b/arch/x86/coco/tdx/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0

-obj-y += tdx.o tdcall.o
+obj-y += tdx.o tdx-shared.o tdcall.o
diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c
new file mode 100644
index 000000000000..ee74f7bbe806
--- /dev/null
+++ b/arch/x86/coco/tdx/tdx-shared.c
@@ -0,0 +1,95 @@
+#include <asm/tdx.h>
+#include <asm/pgtable.h>
+
+static unsigned long try_accept_one(phys_addr_t start, unsigned long len,
+ enum pg_level pg_level)
+{
+ unsigned long accept_size = page_level_size(pg_level);
+ u64 tdcall_rcx;
+ u8 page_size;
+
+ if (!IS_ALIGNED(start, accept_size))
+ return 0;
+
+ if (len < accept_size)
+ return 0;
+
+ /*
+ * Pass the page physical address to the TDX module to accept the
+ * pending, private page.
+ *
+ * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
+ */
+ switch (pg_level) {
+ case PG_LEVEL_4K:
+ page_size = 0;
+ break;
+ case PG_LEVEL_2M:
+ page_size = 1;
+ break;
+ case PG_LEVEL_1G:
+ page_size = 2;
+ break;
+ default:
+ return 0;
+ }
+
+ tdcall_rcx = start | page_size;
+ if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
+ return 0;
+
+ return accept_size;
+}
+
+bool tdx_enc_status_changed_phys(phys_addr_t start, phys_addr_t end, bool enc)
+{
+ if (!enc) {
+ /* Set the shared (decrypted) bits: */
+ start |= cc_mkdec(0);
+ end |= cc_mkdec(0);
+ }
+
+ /*
+ * Notify the VMM about page mapping conversion. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface (GHCI),
+ * section "TDG.VP.VMCALL<MapGPA>"
+ */
+ if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
+ return false;
+
+ /* private->shared conversion requires only MapGPA call */
+ if (!enc)
+ return true;
+
+ /*
+ * For shared->private conversion, accept the page using
+ * TDX_ACCEPT_PAGE TDX module call.
+ */
+ while (start < end) {
+ unsigned long len = end - start;
+ unsigned long accept_size;
+
+ /*
+ * Try larger accepts first. It gives chance to VMM to keep
+ * 1G/2M Secure EPT entries where possible and speeds up
+ * process by cutting number of hypercalls (if successful).
+ */
+
+ accept_size = try_accept_one(start, len, PG_LEVEL_1G);
+ if (!accept_size)
+ accept_size = try_accept_one(start, len, PG_LEVEL_2M);
+ if (!accept_size)
+ accept_size = try_accept_one(start, len, PG_LEVEL_4K);
+ if (!accept_size)
+ return false;
+ start += accept_size;
+ }
+
+ return true;
+}
+
+void tdx_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ if (!tdx_enc_status_changed_phys(start, end, true))
+ panic("Accepting memory failed: %#llx-%#llx\n", start, end);
+}
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index cf6d9a0968d8..0aa95fe29045 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -674,46 +674,6 @@ static bool tdx_cache_flush_required(void)
return true;
}

-static unsigned long try_accept_one(phys_addr_t start, unsigned long len,
- enum pg_level pg_level)
-{
- unsigned long accept_size = page_level_size(pg_level);
- u64 tdcall_rcx;
- u8 page_size;
-
- if (!IS_ALIGNED(start, accept_size))
- return 0;
-
- if (len < accept_size)
- return 0;
-
- /*
- * Pass the page physical address to the TDX module to accept the
- * pending, private page.
- *
- * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
- */
- switch (pg_level) {
- case PG_LEVEL_4K:
- page_size = 0;
- break;
- case PG_LEVEL_2M:
- page_size = 1;
- break;
- case PG_LEVEL_1G:
- page_size = 2;
- break;
- default:
- return 0;
- }
-
- tdcall_rcx = start | page_size;
- if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
- return 0;
-
- return accept_size;
-}
-
/*
* Inform the VMM of the guest's intent for this physical page: shared with
* the VMM or private to the guest. The VMM is expected to change its mapping
@@ -722,51 +682,9 @@ static unsigned long try_accept_one(phys_addr_t start, unsigned long len,
static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
{
phys_addr_t start = __pa(vaddr);
- phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
-
- if (!enc) {
- /* Set the shared (decrypted) bits: */
- start |= cc_mkdec(0);
- end |= cc_mkdec(0);
- }
-
- /*
- * Notify the VMM about page mapping conversion. More info about ABI
- * can be found in TDX Guest-Host-Communication Interface (GHCI),
- * section "TDG.VP.VMCALL<MapGPA>"
- */
- if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
- return false;
-
- /* private->shared conversion requires only MapGPA call */
- if (!enc)
- return true;
+ phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);

- /*
- * For shared->private conversion, accept the page using
- * TDX_ACCEPT_PAGE TDX module call.
- */
- while (start < end) {
- unsigned long len = end - start;
- unsigned long accept_size;
-
- /*
- * Try larger accepts first. It gives chance to VMM to keep
- * 1G/2M Secure EPT entries where possible and speeds up
- * process by cutting number of hypercalls (if successful).
- */
-
- accept_size = try_accept_one(start, len, PG_LEVEL_1G);
- if (!accept_size)
- accept_size = try_accept_one(start, len, PG_LEVEL_2M);
- if (!accept_size)
- accept_size = try_accept_one(start, len, PG_LEVEL_4K);
- if (!accept_size)
- return false;
- start += accept_size;
- }
-
- return true;
+ return tdx_enc_status_changed_phys(start, end, enc);
}

void __init tdx_early_init(void)
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index c5f12b90ef70..a21685fd7c4f 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -82,5 +82,7 @@ struct tdx_module_output {
u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
struct tdx_module_output *out);

+void tdx_accept_memory(phys_addr_t start, phys_addr_t end);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_SHARED_TDX_H */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 234197ec17e4..3a7340ad9a4b 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -50,6 +50,8 @@ bool tdx_early_handle_ve(struct pt_regs *regs);

int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport);

+bool tdx_enc_status_changed_phys(phys_addr_t start, phys_addr_t end, bool enc);
+
#else

static inline void tdx_early_init(void) { };
diff --git a/arch/x86/mm/unaccepted_memory.c b/arch/x86/mm/unaccepted_memory.c
index 1745e6a65024..45706c684ca5 100644
--- a/arch/x86/mm/unaccepted_memory.c
+++ b/arch/x86/mm/unaccepted_memory.c
@@ -7,6 +7,7 @@

#include <asm/io.h>
#include <asm/setup.h>
+#include <asm/shared/tdx.h>
#include <asm/unaccepted_memory.h>

/* Protects unaccepted memory bitmap */
@@ -62,7 +63,13 @@ void accept_memory(phys_addr_t start, phys_addr_t end)
unsigned long len = range_end - range_start;

/* Platform-specific memory-acceptance call goes here */
- panic("Cannot accept memory: unknown platform\n");
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ tdx_accept_memory(range_start * PMD_SIZE,
+ range_end * PMD_SIZE);
+ } else {
+ panic("Cannot accept memory: unknown platform\n");
+ }
+
bitmap_clear(bitmap, range_start, len);
}
spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
--
2.38.0