[RFC PATCH 5/5] lib/find_bit: re-use __find_first_bit() in __find_next_bit()

From: Yury Norov
Date: Thu Jul 28 2022 - 12:12:47 EST


Similarly to m68k, switch __find_first_bit() to use __find_next_bit().
The only difference between them is that 'next' should skip #start bits.
So do it in prologue, and then just call 'first' version if needed.

Signed-off-by: Yury Norov <yury.norov@xxxxxxxxx>

---

This patch is just to see how m68k approach would work in general
(x86_64) case. It almost doubles the size of find_next() functions,
and adds nothing to performance.

I would prefer not taking it upstream.

add/remove: 0/0 grow/shrink: 4/0 up/down: 332/0 (332)
Function old new delta
_find_next_zero_bit 103 191 +88
find_next_clump8 133 219 +86
_find_next_and_bit 120 203 +83
_find_next_bit 99 174 +75

v5.19-rc8 Optimized Difference (more - better)
Random dense bitmap ns ns % sigmas
find_next_bit: 721209 742050 -3 -0.09
find_next_zero_bit: 738138 920508 -25 -0.51
find_last_bit: 802393 839157 -5 -0.08
find_first_bit: 3560900 3747795 -5 -0.30
find_first_and_bit: 38601442 37423751 3 1.28
find_next_and_bit: 335574 322220 4 1.07

Random sparse bitmap
find_next_bit: 15868 13969 12 2.21
find_next_zero_bit: 1311843 1290946 2 0.18
find_last_bit: 13633 13856 -2 -0.37
find_first_bit: 1273625 1233955 3 1.01
find_first_and_bit: 8548 14974 -75 -0.43
find_next_and_bit: 8828 7766 12 1.37


lib/find_bit.c | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/lib/find_bit.c b/lib/find_bit.c
index a56872611a59..137e606a05a1 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -61,12 +61,15 @@ static inline unsigned long __find_next_bit(const unsigned long *addr1,
if (unlikely(start >= nbits))
return nbits;

+ if (IS_ALIGNED(start, BITS_PER_LONG))
+ goto aligned;
+
+ /* Handle 1st word. */
tmp = addr1[start / BITS_PER_LONG];
if (addr2)
tmp &= addr2[start / BITS_PER_LONG];
tmp ^= invert;

- /* Handle 1st word. */
mask = BITMAP_FIRST_WORD_MASK(start);
if (need_swab)
mask = swab(mask);
@@ -74,22 +77,20 @@ static inline unsigned long __find_next_bit(const unsigned long *addr1,
tmp &= mask;

start = round_down(start, BITS_PER_LONG);
-
- while (!tmp) {
- start += BITS_PER_LONG;
- if (start >= nbits)
- return nbits;
-
- tmp = addr1[start / BITS_PER_LONG];
- if (addr2)
- tmp &= addr2[start / BITS_PER_LONG];
- tmp ^= invert;
+ if (tmp) {
+ if (need_swab)
+ tmp = swab(tmp);
+ return min(start + __ffs(tmp), nbits);
}

- if (need_swab)
- tmp = swab(tmp);
+ start += BITS_PER_LONG;
+ if (start >= nbits)
+ return nbits;

- return min(start + __ffs(tmp), nbits);
+aligned:
+ return start + __find_first_bit(addr1 + start/BITS_PER_LONG,
+ addr2 ? addr2 + start/BITS_PER_LONG : NULL,
+ nbits - start, invert, need_swab);
}

#ifndef find_next_bit
--
2.34.1