[PATCH net-next 1/7] dma: compile-out DMA sync op calls when not used

From: Alexander Lobakin
Date: Fri Jan 26 2024 - 08:56:31 EST


Some platforms do have DMA, but DMA there is always direct and coherent.
Currently, even on such platforms DMA sync operations are compiled and
called.
Add a new hidden Kconfig symbol, DMA_NEED_SYNC, and set it only when
either sync operations are needed or there is DMA ops or swiotlb
enabled. Set dma_need_sync() and dma_skip_sync() (stub for now)
depending on this symbol state and don't call sync ops when
dma_skip_sync() is true.
The change allows for future optimizations of DMA sync calls depending
on compile-time or runtime conditions.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@xxxxxxxxx>
---
kernel/dma/Kconfig | 4 ++
include/linux/dma-mapping.h | 92 +++++++++++++++++++++++++------------
kernel/dma/mapping.c | 26 ++++++-----
3 files changed, 81 insertions(+), 41 deletions(-)

diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index d62f5957f36b..1c9ff05b1ecb 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -107,6 +107,10 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC
bool
depends on SWIOTLB

+config DMA_NEED_SYNC
+ def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \
+ ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB
+
config DMA_RESTRICTED_POOL
bool "DMA Restricted Pool"
depends on OF && OF_RESERVED_MEM && SWIOTLB
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4a658de44ee9..9dd7e1578bf6 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -117,14 +117,14 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs);
void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs);
-void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
- enum dma_data_direction dir);
-void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir);
-void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir);
-void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir);
+void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir);
+void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir);
+void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction dir);
+void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction dir);
void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flag, unsigned long attrs);
void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
@@ -147,7 +147,6 @@ u64 dma_get_required_mask(struct device *dev);
bool dma_addressing_limited(struct device *dev);
size_t dma_max_mapping_size(struct device *dev);
size_t dma_opt_mapping_size(struct device *dev);
-bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
unsigned long dma_get_merge_boundary(struct device *dev);
struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
@@ -195,20 +194,24 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
}
-static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir)
+static inline void __dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t addr, size_t size,
+ enum dma_data_direction dir)
{
}
-static inline void dma_sync_single_for_device(struct device *dev,
- dma_addr_t addr, size_t size, enum dma_data_direction dir)
+static inline void __dma_sync_single_for_device(struct device *dev,
+ dma_addr_t addr, size_t size,
+ enum dma_data_direction dir)
{
}
-static inline void dma_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sg, int nelems, enum dma_data_direction dir)
+static inline void __dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sg, int nelems,
+ enum dma_data_direction dir)
{
}
-static inline void dma_sync_sg_for_device(struct device *dev,
- struct scatterlist *sg, int nelems, enum dma_data_direction dir)
+static inline void __dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg, int nelems,
+ enum dma_data_direction dir)
{
}
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -277,10 +280,6 @@ static inline size_t dma_opt_mapping_size(struct device *dev)
{
return 0;
}
-static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
-{
- return false;
-}
static inline unsigned long dma_get_merge_boundary(struct device *dev)
{
return 0;
@@ -348,20 +347,55 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
}

-static inline void dma_sync_single_range_for_cpu(struct device *dev,
- dma_addr_t addr, unsigned long offset, size_t size,
- enum dma_data_direction dir)
+static inline void
+__dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir)
{
- return dma_sync_single_for_cpu(dev, addr + offset, size, dir);
+ __dma_sync_single_for_cpu(dev, addr + offset, size, dir);
}

-static inline void dma_sync_single_range_for_device(struct device *dev,
- dma_addr_t addr, unsigned long offset, size_t size,
- enum dma_data_direction dir)
+static inline void
+__dma_sync_single_range_for_device(struct device *dev, dma_addr_t addr,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir)
{
- return dma_sync_single_for_device(dev, addr + offset, size, dir);
+ __dma_sync_single_for_device(dev, addr + offset, size, dir);
}

+#ifdef CONFIG_DMA_NEED_SYNC
+
+#define dma_skip_sync(dev) false
+
+bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
+
+#else /* !CONFIG_DMA_NEED_SYNC */
+
+#define dma_skip_sync(dev) true
+#define dma_need_sync(dev, dma_addr) false
+
+#endif /* !CONFIG_DMA_NEED_SYNC */
+
+#define dma_check_sync(op, dev, ...) \
+ do { \
+ if (!dma_skip_sync(dev)) \
+ op(dev, __VA_ARGS__); \
+ } while (0)
+
+#define dma_sync_single_for_cpu(d, a, s, r) \
+ dma_check_sync(__dma_sync_single_for_cpu, d, a, s, r)
+#define dma_sync_single_for_device(d, a, s, r) \
+ dma_check_sync(__dma_sync_single_for_device, d, a, s, r)
+#define dma_sync_sg_for_cpu(d, s, n, r) \
+ dma_check_sync(__dma_sync_sg_for_cpu, d, s, n, r)
+#define dma_sync_sg_for_device(d, s, n, r) \
+ dma_check_sync(__dma_sync_sg_for_device, d, s, n, r)
+
+#define dma_sync_single_range_for_cpu(d, a, o, s, r) \
+ dma_check_sync(__dma_sync_single_range_for_cpu, d, a, o, s, r)
+#define dma_sync_single_range_for_device(d, a, o, s, r) \
+ dma_check_sync(__dma_sync_single_range_for_device, d, a, o, s, r)
+
/**
* dma_unmap_sgtable - Unmap the given buffer for DMA
* @dev: The device for which to perform the DMA operation
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 58db8fd70471..a30f37f9d4db 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -329,8 +329,8 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
}
EXPORT_SYMBOL(dma_unmap_resource);

-void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
- enum dma_data_direction dir)
+void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

@@ -341,10 +341,10 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
ops->sync_single_for_cpu(dev, addr, size, dir);
debug_dma_sync_single_for_cpu(dev, addr, size, dir);
}
-EXPORT_SYMBOL(dma_sync_single_for_cpu);
+EXPORT_SYMBOL(__dma_sync_single_for_cpu);

-void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir)
+void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

@@ -355,10 +355,10 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
ops->sync_single_for_device(dev, addr, size, dir);
debug_dma_sync_single_for_device(dev, addr, size, dir);
}
-EXPORT_SYMBOL(dma_sync_single_for_device);
+EXPORT_SYMBOL(__dma_sync_single_for_device);

-void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir)
+void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

@@ -369,10 +369,10 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
ops->sync_sg_for_cpu(dev, sg, nelems, dir);
debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
}
-EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+EXPORT_SYMBOL(__dma_sync_sg_for_cpu);

-void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir)
+void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

@@ -383,7 +383,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
ops->sync_sg_for_device(dev, sg, nelems, dir);
debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
}
-EXPORT_SYMBOL(dma_sync_sg_for_device);
+EXPORT_SYMBOL(__dma_sync_sg_for_device);

/*
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
@@ -841,6 +841,7 @@ size_t dma_opt_mapping_size(struct device *dev)
}
EXPORT_SYMBOL_GPL(dma_opt_mapping_size);

+#ifdef CONFIG_DMA_NEED_SYNC
bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -850,6 +851,7 @@ bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
return ops->sync_single_for_cpu || ops->sync_single_for_device;
}
EXPORT_SYMBOL_GPL(dma_need_sync);
+#endif /* CONFIG_DMA_NEED_SYNC */

unsigned long dma_get_merge_boundary(struct device *dev)
{
--
2.43.0