[PATCH] scatterlist: Speed up for_each_sg() loop macro

From: Sultan Alsawaf
Date: Fri Oct 25 2019 - 17:34:19 EST


From: Sultan Alsawaf <sultan@xxxxxxxxxxxxxxx>

Scatterlists are chained in predictable arrays of up to
SG_MAX_SINGLE_ALLOC sg structs in length. Using this knowledge, speed up
for_each_sg() by using constant operations to determine when to simply
increment the sg pointer by one or get the next sg array in the chain.

Rudimentary measurements with a trivial loop body show that this yields
roughly a 2x performance gain.

The following simple test module proves the correctness of the new loop
definition by testing all the different edge cases of sg chains:
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>

static int __init test_for_each_sg(void)
{
static const gfp_t gfp_flags = GFP_KERNEL | __GFP_NOFAIL;
struct scatterlist *sg;
struct sg_table *table;
long old = 0, new = 0;
unsigned int i, nents;

table = kmalloc(sizeof(*table), gfp_flags);
for (nents = 1; nents <= 3 * SG_MAX_SINGLE_ALLOC; nents++) {
BUG_ON(sg_alloc_table(table, nents, gfp_flags));
for (sg = table->sgl; sg; sg = sg_next(sg))
old ^= (long)sg;
for_each_sg(table->sgl, sg, nents, i)
new ^= (long)sg;
sg_free_table(table);
}

BUG_ON(old != new);
kfree(table);
return 0;
}
module_init(test_for_each_sg);

Signed-off-by: Sultan Alsawaf <sultan@xxxxxxxxxxxxxxx>
---
include/linux/scatterlist.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 556ec1ea2574..73f7fd6702d7 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -146,7 +146,10 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
* Loop over each sg element, following the pointer to a new list if necessary
*/
#define for_each_sg(sglist, sg, nr, __i) \
- for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+ for (__i = 0, sg = (sglist); __i < (nr); \
+ likely(++__i % (SG_MAX_SINGLE_ALLOC - 1) || \
+ (__i + 1) >= (nr)) ? sg++ : \
+ (sg = sg_chain_ptr(sg + 1)))

/**
* sg_chain - Chain two sglists together
--
2.23.0