Re: [PATCH v12 09/22] x86/virt/tdx: Use all system memory when initializing TDX module as TDX memory

From: David Hildenbrand
Date: Tue Jul 11 2023 - 07:39:06 EST




[...]

+/* All TDX-usable memory regions. Protected by mem_hotplug_lock. */
+static LIST_HEAD(tdx_memlist);
+
/*
* Wrapper of __seamcall() to convert SEAMCALL leaf function error code
* to kernel error code. @seamcall_ret and @out contain the SEAMCALL
@@ -204,6 +214,79 @@ static int tdx_get_sysinfo(struct tdsysinfo_struct *sysinfo,
return 0;
}
+/*
+ * Add a memory region as a TDX memory block. The caller must make sure
+ * all memory regions are added in address ascending order and don't
+ * overlap.
+ */
+static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ struct tdx_memblock *tmb;
+
+ tmb = kmalloc(sizeof(*tmb), GFP_KERNEL);
+ if (!tmb)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tmb->list);
+ tmb->start_pfn = start_pfn;
+ tmb->end_pfn = end_pfn;
+
+ /* @tmb_list is protected by mem_hotplug_lock */

If the list is static and independent of memory hotplug, why does it have to be protected?

I assume because the memory notifier might currently trigger before building the list.

Not sure if that is the right approach. See below.

+ list_add_tail(&tmb->list, tmb_list);
+ return 0;
+}
+
+static void free_tdx_memlist(struct list_head *tmb_list)
+{
+ /* @tmb_list is protected by mem_hotplug_lock */
+ while (!list_empty(tmb_list)) {
+ struct tdx_memblock *tmb = list_first_entry(tmb_list,
+ struct tdx_memblock, list);
+
+ list_del(&tmb->list);
+ kfree(tmb);
+ }
+}
+
+/*
+ * Ensure that all memblock memory regions are convertible to TDX
+ * memory. Once this has been established, stash the memblock
+ * ranges off in a secondary structure because memblock is modified
+ * in memory hotplug while TDX memory regions are fixed.
+ */
+static int build_tdx_memlist(struct list_head *tmb_list)
+{
+ unsigned long start_pfn, end_pfn;
+ int i, ret;
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
+ /*
+ * The first 1MB is not reported as TDX convertible memory.
+ * Although the first 1MB is always reserved and won't end up
+ * to the page allocator, it is still in memblock's memory
+ * regions. Skip them manually to exclude them as TDX memory.
+ */
+ start_pfn = max(start_pfn, PHYS_PFN(SZ_1M));
+ if (start_pfn >= end_pfn)
+ continue;
+
+ /*
+ * Add the memory regions as TDX memory. The regions in
+ * memblock has already guaranteed they are in address
+ * ascending order and don't overlap.
+ */
+ ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn);
+ if (ret)
+ goto err;
+ }

So at the time init_tdx_module() is called, you simply go over all memblocks.

But how can you be sure that they are TDX-capable?

While the memory notifier will deny onlining new memory blocks, add_memory() already happened and added a new memory block to the system (and to memblock). See add_memory_resource().

It might be cleaner to build the list once during module init (before any memory hotplug can happen and before we tear down memblock) and not require ARCH_KEEP_MEMBLOCK. Essentially, before registering the notifier. So the list is really static.

But maybe I am missing something.

+
+ return 0;
+err:
+ free_tdx_memlist(tmb_list);
+ return ret;
+}
+
static int init_tdx_module(void)
{
struct tdsysinfo_struct *sysinfo;
@@ -230,10 +313,25 @@ static int init_tdx_module(void)
if (ret)
goto out;

[...]

+struct tdx_memblock {
+ struct list_head list;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+};

If it's never consumed by someone else, maybe keep it local to the c file?

+
struct tdx_module_output;
u64 __seamcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
struct tdx_module_output *out);

--
Cheers,

David / dhildenb