Re: [PATCH v1 2/5] lib/bitmap: Introduce bitmap_scatter() and bitmap_gather() helpers

From: Yury Norov
Date: Tue Sep 26 2023 - 21:25:13 EST


On Tue, Sep 26, 2023 at 08:20:04AM +0300, Andy Shevchenko wrote:
> These helpers are the optimized versions of the bitmap_remap()
> where one of the bitmaps (source or destination) is of sequential bits.

If so, can you add a test that makes sure that new API is consistent
with the old bitmap_remap? And also provide numbers how well are they
optimized, comparing to bitmap_remap.

> See more in the kernel documentation of the helpers.

I grepped the whole kernel, not only Documentation directory, and found
nothing...

> Signed-off-by: Andy Shevchenko <andriy.shevchenko@xxxxxxxxxxxxxxx>
> ---
> include/linux/bitmap.h | 9 ++++++
> lib/bitmap.c | 70 ++++++++++++++++++++++++++++++++++++++++++
> lib/test_bitmap.c | 23 ++++++++++++++
> 3 files changed, 102 insertions(+)
>
> diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
> index 1516ff979315..87013b9a7dd8 100644
> --- a/include/linux/bitmap.h
> +++ b/include/linux/bitmap.h
> @@ -60,6 +60,8 @@ struct device;
> * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
> * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest
> * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask)
> + * bitmap_scatter(dst, src, mask, nbits) *dst = map(dense, sparse)(src)
> + * bitmap_gather(dst, src, mask, nbits) *dst = map(sparse, dense)(src)
> * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
> * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit)
> * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap
> @@ -208,6 +210,12 @@ int bitmap_parselist(const char *buf, unsigned long *maskp,
> int nmaskbits);
> int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
> unsigned long *dst, int nbits);
> +
> +unsigned int bitmap_scatter(unsigned long *dst, const unsigned long *src,
> + const unsigned long *mask, unsigned int nbits);
> +unsigned int bitmap_gather(unsigned long *dst, const unsigned long *src,
> + const unsigned long *mask, unsigned int nbits);
> +
> void bitmap_remap(unsigned long *dst, const unsigned long *src,
> const unsigned long *old, const unsigned long *new, unsigned int nbits);
> int bitmap_bitremap(int oldbit,
> @@ -216,6 +224,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
> const unsigned long *relmap, unsigned int bits);
> void bitmap_fold(unsigned long *dst, const unsigned long *orig,
> unsigned int sz, unsigned int nbits);
> +
> int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
> void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
> int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
> diff --git a/lib/bitmap.c b/lib/bitmap.c
> index 935e0f96e785..31cfc7846aae 100644
> --- a/lib/bitmap.c
> +++ b/lib/bitmap.c
> @@ -942,6 +942,76 @@ int bitmap_parse(const char *start, unsigned int buflen,
> }
> EXPORT_SYMBOL(bitmap_parse);
>
> +/**
> + * bitmap_scatter - Scatter a bitmap according to the given mask
> + * @dst: scattered bitmap
> + * @src: gathered bitmap
> + * @mask: bits to assign to in the scattered bitmap
> + * @nbits: number of bits in each of these bitmaps
> + *
> + * Scatters bitmap with sequential bits according to the given @mask.
> + *
> + * Example:
> + * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302.
> + *
> + * Or in binary form
> + * @src @mask @dst
> + * 0000000001011010 0001001100010011 0000001100000010
> + *
> + * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12)
> + *
> + * Returns: the weight of the @mask.

Returning a weight of the mask is somewhat non-trivial... To me it
would be logical to return a weight of destination, for example...

But I see that in the following patch you're using the returned value.
Maybe add a few words to advocate that?

> + */
> +unsigned int bitmap_scatter(unsigned long *dst, const unsigned long *src,
> + const unsigned long *mask, unsigned int nbits)
> +{
> + unsigned int bit;
> + int n = 0;

Is n signed for purpose? I think it should be consistent with
return value.

> +
> + bitmap_zero(dst, nbits);
> +
> + for_each_set_bit(bit, mask, nbits)
> + __assign_bit(bit, dst, test_bit(n++, src));
> +
> + return n;
> +}
> +EXPORT_SYMBOL(bitmap_scatter);
> +
> +/**
> + * bitmap_gather - Gather a bitmap according to given mask
> + * @dst: gathered bitmap
> + * @src: scattered bitmap
> + * @mask: bits to extract from in the scattered bitmap
> + * @nbits: number of bits in each of these bitmaps
> + *
> + * Gathers bitmap with sparse bits according to the given @mask.
> + *
> + * Example:
> + * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a.

Not sure about others, but to me hex representation is quite useless,
moreover it's followed by binary one.

> + * Or in binary form
> + * @src @mask @dst
> + * 0000001100000010 0001001100010011 0000000000011010
> + *
> + * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5)
> + *
> + * Returns: the weight of the @mask.
> + */

It looks like those are designed complement to each other. Is that
true? If so, can you make your example showing that
scatter -> gather -> scatter
would restore the original bitmap?

If I'm wrong, can you please underline that they are not complement,
and why?

> +unsigned int bitmap_gather(unsigned long *dst, const unsigned long *src,
> + const unsigned long *mask, unsigned int nbits)
> +{
> + unsigned int bit;
> + int n = 0;
> +
> + bitmap_zero(dst, nbits);
> +
> + for_each_set_bit(bit, mask, nbits)
> + __assign_bit(n++, dst, test_bit(bit, src));
> +
> + return n;
> +}
> +EXPORT_SYMBOL(bitmap_gather);

I feel like they should reside in header, because they are quite a small
functions indeed, and they would benefit from compile-time optimizations
without bloating the kernel.

Moreover, you are using them in patch #3 on 64-bit bitmaps, which
would benefit from small_const_nbits() optimization.

> +
> /**
> * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
> * @buf: pointer to a bitmap
> diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
> index 1f2dc7fef17f..f43a07679998 100644
> --- a/lib/test_bitmap.c
> +++ b/lib/test_bitmap.c
> @@ -50,6 +50,9 @@ static const unsigned long exp2[] __initconst = {
> static const unsigned long exp2_to_exp3_mask[] __initconst = {
> BITMAP_FROM_U64(0x008000020020212eULL),
> };
> +static const unsigned long exp2_to_exp3_maskg[] __initconst = {
> + BITMAP_FROM_U64(0x00000000000001ffULL),
> +};
> /* exp3_0_1 = (exp2[0] & ~exp2_to_exp3_mask) | (exp2[1] & exp2_to_exp3_mask) */
> static const unsigned long exp3_0_1[] __initconst = {
> BITMAP_FROM_U64(0x33b3333311313137ULL),
> @@ -357,6 +360,25 @@ static void __init test_replace(void)
> expect_eq_bitmap(bmap, exp3_1_0, nbits);
> }
>
> +static void __init test_bitmap_sg(void)
> +{
> + unsigned int nbits = 64;
> + DECLARE_BITMAP(bmap, 1024);

Can you make it 1000? That way we'll test non-aligned case.

> + unsigned int w;
> +
> + bitmap_zero(bmap, 1024);
> + w = bitmap_gather(bmap, exp2_to_exp3_mask, exp2_to_exp3_mask, nbits);
> + expect_eq_uint(bitmap_weight(exp2_to_exp3_mask, nbits), w);
> + expect_eq_uint(bitmap_weight(bmap, 1024), w);
> + expect_eq_bitmap(bmap, exp2_to_exp3_maskg, nbits);
> +
> + bitmap_zero(bmap, 1024);
> + w = bitmap_scatter(bmap, exp2_to_exp3_maskg, exp2_to_exp3_mask, nbits);
> + expect_eq_uint(bitmap_weight(exp2_to_exp3_maskg, nbits), w);
> + expect_eq_uint(bitmap_weight(bmap, 1024), w);
> + expect_eq_bitmap(bmap, exp2_to_exp3_mask, nbits);

Would be interesting to compare bitmap scatter/gather performance
against bitmap_remap.

> +}
> +
> #define PARSE_TIME 0x1
> #define NO_LEN 0x2
>
> @@ -1228,6 +1250,7 @@ static void __init selftest(void)
> test_fill_set();
> test_copy();
> test_replace();
> + test_bitmap_sg();
> test_bitmap_arr32();
> test_bitmap_arr64();
> test_bitmap_parse();
> --
> 2.40.0.1.gaa8946217a0b