Re: [PATCH] libbpf hashmap: Fix undefined behavior in hash_bits

From: Ian Rogers
Date: Thu Oct 29 2020 - 16:58:35 EST


On Thu, Oct 29, 2020 at 1:16 PM Andrii Nakryiko
<andrii.nakryiko@xxxxxxxxx> wrote:
>
> On Thu, Oct 29, 2020 at 9:11 AM Ian Rogers <irogers@xxxxxxxxxx> wrote:
> >
> > If bits is 0, the case when the map is empty, then the >> is the size of
> > the register which is undefined behavior - on x86 it is the same as a
> > shift by 0. Fix by handling the 0 case explicitly when running with
> > address sanitizer.
> >
> > A variant of this patch was posted previously as:
> > https://lore.kernel.org/lkml/20200508063954.256593-1-irogers@xxxxxxxxxx/
> >
> > Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
> > ---
> > tools/lib/bpf/hashmap.h | 14 ++++++++++++++
> > 1 file changed, 14 insertions(+)
> >
> > diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
> > index d9b385fe808c..27d0556527d3 100644
> > --- a/tools/lib/bpf/hashmap.h
> > +++ b/tools/lib/bpf/hashmap.h
> > @@ -12,9 +12,23 @@
> > #include <stddef.h>
> > #include <limits.h>
> >
> > +#ifdef __has_feature
> > +#define HAVE_FEATURE(f) __has_feature(f)
> > +#else
> > +#define HAVE_FEATURE(f) 0
> > +#endif
> > +
> > static inline size_t hash_bits(size_t h, int bits)
> > {
> > /* shuffle bits and return requested number of upper bits */
> > +#if defined(ADDRESS_SANITIZER) || HAVE_FEATURE(address_sanitizer)
> > + /*
> > + * If the requested bits == 0 avoid undefined behavior from a
> > + * greater-than bit width shift right (aka invalid-shift-exponent).
> > + */
> > + if (bits == 0)
> > + return -1;
> > +#endif
>
> Oh, just too much # magic here :(... If we want to prevent hash_bits()
> from being called with bits == 0 (despite the result never used),
> let's just adjust hashmap__for_each_key_entry and
> hashmap__for_each_key_entry_safe macros:
>
> diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
> index d9b385fe808c..488e0ef236cb 100644
> --- a/tools/lib/bpf/hashmap.h
> +++ b/tools/lib/bpf/hashmap.h
> @@ -174,9 +174,9 @@ bool hashmap__find(const struct hashmap *map,
> const void *key, void **value);
> * @key: key to iterate entries for
> */
> #define hashmap__for_each_key_entry(map, cur, _key) \
> - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
> - map->cap_bits); \
> - map->buckets ? map->buckets[bkt] : NULL; }); \
> + for (cur = map->buckets \
> + ? map->buckets[hash_bits(map->hash_fn((_key),
> map->ctx), map->cap_bits)] \
> + : NULL; \
> cur; \
> cur = cur->next) \
> if (map->equal_fn(cur->key, (_key), map->ctx))
>
> Either way it's a bit ugly and long, but at least we don't have extra
> #-driven ugliness.


This can work with the following changes in hashmap.c. I'll resend
this as a whole patch.

Thanks,
Ian

--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -156,7 +156,7 @@ int hashmap__insert(struct hashmap *map,
const void **old_key, void **old_value)
{
struct hashmap_entry *entry;
- size_t h;
+ size_t h = 0;
int err;

if (old_key)
@@ -164,7 +164,9 @@ int hashmap__insert(struct hashmap *map,
if (old_value)
*old_value = NULL;

- h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (map->buckets)
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+
if (strategy != HASHMAP_APPEND &&
hashmap_find_entry(map, key, h, NULL, &entry)) {
if (old_key)
@@ -208,6 +210,9 @@ bool hashmap__find(const struct hashmap
struct hashmap_entry *entry;
size_t h;

+ if (!map->buckets)
+ return false;
+
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
if (!hashmap_find_entry(map, key, h, NULL, &entry))
return false;
@@ -223,6 +228,9 @@ bool hashmap__delete(struct hashmap *map
struct hashmap_entry **pprev, *entry;
size_t h;

+ if (!map->buckets)
+ return false;
+
h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
if (!hashmap_find_entry(map, key, h, &pprev, &entry))
return false;


> > #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
> > /* LP64 case */
> > return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
> > --
> > 2.29.1.341.ge80a0c044ae-goog
> >