[PATCH v2 5/7] swiotlb: Add a boot option to enable dynamic bounce buffers

From: Petr Tesarik
Date: Wed Apr 19 2023 - 06:07:25 EST


From: Petr Tesarik <petr.tesarik.ext@xxxxxxxxxx>

The main goal of allocating bounce buffers dynamically is to allow
allocating a minimal fixed swiotlb at boot time but avoid hard
limits on the amount of I/O that can be handled later.

Compared to fixed IO TLB slots, dynamic allocation of bounce buffers
typically increases the worst-case I/O latency and may also reduce
performance for some workloads.

I did some basic testing with fio against a QEMU SATA drive backed
by a RAM block device in the host to minimize external factors. The
kernel was booted with "swiotlb=force,dynamic". I performed testing
of single-threaded I/O of 4-KiB segments, single-threaded I/O of
1-MiB segments, and 4-core parallel I/O of 64-KiB segments. The last
column is the coefficient of variance in 5 runs of the test:

Read Write Coeff
single 4-KiB +1.9% +1.9% 1.7%
single 1-MiB -8.1% -8.2% 2.2%
parallel -9.4% -9.5% 2.6%

There is a slight increase in bandwidth for single-threaded 4-KiB
segments. This is because the buddy allocator is quite efficient for
order-0 allocations, so the overhead is offset by faster allocation
from an almost empty fixed swiotlb (which is still used for buffers
smaller than one page).

Anyway, since the feature is new and does not benefit all
workloads, make it disabled by default and let people turn it on
with "swiotlb=dynamic" if needed. Since this option can be combined
with "force", the parser is modified to allow multiple options
separated by commas.

A new bool field is added to struct io_tlb_mem to tell whether
dynamic allocations are allowed. This field is always false for DMA
restricted pools. It is also false for other software IO TLBs
unless "swiotlb=dynamic" was specified.

Signed-off-by: Petr Tesarik <petr.tesarik.ext@xxxxxxxxxx>
---
.../admin-guide/kernel-parameters.txt | 6 +++++-
include/linux/swiotlb.h | 3 ++-
kernel/dma/swiotlb.c | 20 ++++++++++++++-----
3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6221a1d057dd..c8bc0c8b8df6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6110,14 +6110,18 @@
Execution Facility on pSeries.

swiotlb= [ARM,IA-64,PPC,MIPS,X86]
- Format: { <int> [,<int>] | force | noforce }
+ Format: { <int> [,<int>] [,option-list] | option-list }
<int> -- Number of I/O TLB slabs
<int> -- Second integer after comma. Number of swiotlb
areas with their own lock. Will be rounded up
to a power of 2.
+ <option-list> -- Comma-separated list of options.
+
+ Available options:
force -- force using of bounce buffers even if they
wouldn't be automatically used by the kernel
noforce -- Never use bounce buffers (for debugging)
+ dynamic -- allow dynamic allocation of bounce buffers

switches= [HW,M68k]

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 0856eddb9063..e614aa0f4f64 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -98,6 +98,7 @@ struct io_tlb_mem {
bool late_alloc;
bool force_bounce;
bool for_alloc;
+ bool allow_dyn;
unsigned int nareas;
unsigned int area_nslabs;
struct io_tlb_area *areas;
@@ -142,7 +143,7 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)

return mem &&
(is_swiotlb_fixed(mem, paddr) ||
- is_swiotlb_dyn(dev, paddr));
+ (mem->allow_dyn && is_swiotlb_dyn(dev, paddr)));
}

static inline bool is_swiotlb_force_bounce(struct device *dev)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f4faee38ead9..4899fb0e4331 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -86,6 +86,7 @@ struct io_tlb_dyn_slot {

static bool swiotlb_force_bounce;
static bool swiotlb_force_disable;
+static bool swiotlb_dynamic;

struct io_tlb_mem io_tlb_default_mem;

@@ -167,10 +168,18 @@ setup_io_tlb_npages(char *str)
swiotlb_adjust_nareas(simple_strtoul(str, &str, 0));
if (*str == ',')
++str;
- if (!strcmp(str, "force"))
- swiotlb_force_bounce = true;
- else if (!strcmp(str, "noforce"))
- swiotlb_force_disable = true;
+ while (str && *str) {
+ char *opt = strsep(&str, ",");
+
+ if (!strcmp(opt, "force"))
+ swiotlb_force_bounce = true;
+ else if (!strcmp(opt, "noforce"))
+ swiotlb_force_disable = true;
+ else if (!strcmp(opt, "dynamic"))
+ swiotlb_dynamic = true;
+ else
+ pr_warn("Invalid swiotlb option: %s", opt);
+ }

return 0;
}
@@ -287,6 +296,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
mem->area_nslabs = nslabs / mem->nareas;

mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
+ mem->allow_dyn = swiotlb_dynamic;

for (i = 0; i < mem->nareas; i++) {
spin_lock_init(&mem->areas[i].lock);
@@ -1070,7 +1080,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
}

tlb_addr = (phys_addr_t)DMA_MAPPING_ERROR;
- if (!is_swiotlb_for_alloc(dev))
+ if (mem->allow_dyn)
tlb_addr = swiotlb_dyn_map(dev, orig_addr, alloc_size,
alloc_align_mask, dir, attrs);
if (tlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
--
2.25.1