[PATCH v4 1/1] swiotlb: Track and report io_tlb_used high water marks in debugfs

From: Michael Kelley
Date: Thu Apr 13 2023 - 13:58:04 EST


swiotlb currently reports the total number of slabs and the instantaneous
in-use slabs in debugfs. But with increased usage of swiotlb for all I/O
in Confidential Computing (coco) VMs, it has become difficult to know
how much memory to allocate for swiotlb bounce buffers, either via the
automatic algorithm in the kernel or by specifying a value on the
kernel boot line. The current automatic algorithm generously allocates
swiotlb bounce buffer memory, and may be wasting significant memory in
many use cases.

To support better understanding of swiotlb usage, add tracking of the
the high water mark for usage of the default swiotlb bounce buffer memory
pool and any reserved memory pools. Report these high water marks in
debugfs along with the other swiotlb pool metrics. Allow the high water
marks to be reset to zero at runtime by writing to them.

Signed-off-by: Michael Kelley <mikelley@xxxxxxxxxxxxx>
---
This patch is built assuming my previous patch is in place to fix
io_tlb_used reporting for reserved memory pools.

Changes in v4:
* Break out high water mark accounting into separate functions with
stubs when CONFIG_DEBUG_FS=n. [Christoph Hellwig]

* Do high water mark accounting for each reserved memory pool as well
as the default pool. Move the accounting variables into struct
io_tlb_mem to support this. [Petr Tesarik]

Changes in v3:
* Do high water mark accounting only when CONFIG_DEBUG_FS=y. As
as a result, add back the mem_used() function for the "swiotlb
buffer is full" error message. [Christoph -- I didn't hear back
whether this approach addresses your concern about one additional
atomic operation when slots are allocated and again when freed. I've
gone ahead with this new version, and we can obviously have further
discussion.]

* Remove unnecessary u64 casts. [Christoph Hellwig]

* Track slot usage and the high water mark only for io_tlb_default_mem.
Previous versions incorrectly included per-device pools. [Petr Tesarik]

Changes in v2:
* Only reset the high water mark to zero when the specified new value
is zero, to prevent confusion about the ability to reset to some
other value [Dexuan Cui]

include/linux/swiotlb.h | 7 ++++++
kernel/dma/swiotlb.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 73 insertions(+)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index bcef10e..6dc4598 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -87,6 +87,11 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
* @for_alloc: %true if the pool is used for memory allocation
* @nareas: The area number in the pool.
* @area_nslabs: The slot number in the area.
+ * @total_used: The total number of slots in the pool that are currently used
+ * across all areas. Used only for calculating used_hiwater in
+ * debugfs.
+ * @used_hiwater: The high water mark for total_used. Used only for reporting
+ * in debugfs.
*/
struct io_tlb_mem {
phys_addr_t start;
@@ -102,6 +107,8 @@ struct io_tlb_mem {
unsigned int area_nslabs;
struct io_tlb_area *areas;
struct io_tlb_slot *slots;
+ atomic_long_t total_used;
+ atomic_long_t used_hiwater;
};
extern struct io_tlb_mem io_tlb_default_mem;

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index db43de82..e498d75 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -608,6 +608,39 @@ static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
return index;
}

+#ifdef CONFIG_DEBUG_FS
+/*
+ * Track the total used slots with a global atomic value in order to have
+ * correct information to determine the high water mark. The mem_used()
+ * function gives imprecise results because there's no locking across
+ * multiple areas.
+ */
+
+static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ unsigned long old_hiwater, new_used;
+
+ new_used = atomic_long_add_return(nslots, &mem->total_used);
+ old_hiwater = atomic_long_read(&mem->used_hiwater);
+ do {
+ if (new_used <= old_hiwater)
+ break;
+ } while (!atomic_long_try_cmpxchg(&mem->used_hiwater,
+ &old_hiwater, new_used));
+}
+
+static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_sub(nslots, &mem->total_used);
+}
+
+#else /* !CONFIG_DEBUG_FS */
+
+static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots) {}
+static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) {}
+
+#endif /*CONFIG_DEBUG_FS */
+
/*
* Find a suitable number of IO TLB entries size that will fit this request and
* allocate a buffer from that IO TLB pool.
@@ -702,6 +735,9 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
area->index = wrap_area_index(mem, index + nslots);
area->used += nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ inc_used_and_hiwater(mem, nslots);
+
return slot_index;
}

@@ -834,6 +870,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
area->used -= nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ dec_used(mem, nslots);
}

/*
@@ -935,11 +973,37 @@ static int io_tlb_used_get(void *data, u64 *val)
*val = mem_used(mem);
return 0;
}
+
+static int io_tlb_hiwater_get(void *data, u64 *val)
+{
+ struct io_tlb_mem *mem = data;
+
+ *val = atomic_long_read(&mem->used_hiwater);
+ return 0;
+}
+
+static int io_tlb_hiwater_set(void *data, u64 val)
+{
+ struct io_tlb_mem *mem = data;
+
+ /* Only allow setting to zero */
+ if (val != 0)
+ return -EINVAL;
+
+ atomic_long_set(&mem->used_hiwater, val);
+ return 0;
+}
+
DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
+ io_tlb_hiwater_set, "%llu\n");

static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
{
+ atomic_long_set(&mem->total_used, 0);
+ atomic_long_set(&mem->used_hiwater, 0);
+
mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
if (!mem->nslabs)
return;
@@ -947,6 +1011,8 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem,
&fops_io_tlb_used);
+ debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
+ &fops_io_tlb_hiwater);
}

static int __init __maybe_unused swiotlb_create_default_debugfs(void)
--
1.8.3.1