[PATCH resend] rhashtable: detect when object movement might have invalidated a lookup

From: NeilBrown
Date: Fri Jul 06 2018 - 03:08:50 EST



Some users of rhashtable might need to change the key
of an object and move it to a different location in the table.
Other users might want to allocate objects using
SLAB_TYPESAFE_BY_RCU which can result in the same memory allocation
being used for a different (type-compatible) purpose and similarly
end up in a different hash-chain.

To support these, we store a unique NULLS_MARKER at the end of
each chain, and when a search fails to find a match, we check
if the NULLS marker found was the expected one. If not,
the search is repeated.

The unique NULLS_MARKER is derived from the address of the
head of the chain.

If an object is removed and re-added to the same hash chain, we won't
notice by looking that the NULLS marker. In this case we must be sure
that it was not re-added *after* its original location, or a lookup may
incorrectly fail. The easiest solution is to ensure it is inserted at
the start of the chain. insert_slow() already does that,
insert_fast() does not. So this patch changes insert_fast to always
insert at the head of the chain.

Note that such a user must do their own double-checking of
the object found by rhashtable_lookup_fast() after ensuring
mutual exclusion which anything that might change the key, such as
successfully taking a new reference.

Signed-off-by: NeilBrown <neilb@xxxxxxxx>
---

I'm resending this unchanged. Herbert wasn't sure if we needed all the
functionality provided. I explained that it was useful when
SLAB_TYPESAFE_BY_RCU slabs are used. No further discussion happened.

Thanks,
NeilBrown



include/linux/rhashtable.h | 35 +++++++++++++++++++++++------------
lib/rhashtable.c | 8 +++++---
2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index eb7111039247..10435a77b156 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -75,8 +75,10 @@ struct bucket_table {
struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
};

+#define RHT_NULLS_MARKER(ptr) \
+ ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
#define INIT_RHT_NULLS_HEAD(ptr) \
- ((ptr) = (typeof(ptr)) NULLS_MARKER(0))
+ ((ptr) = RHT_NULLS_MARKER(&(ptr)))

static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
{
@@ -471,6 +473,7 @@ static inline struct rhash_head *__rhashtable_lookup(
.ht = ht,
.key = key,
};
+ struct rhash_head __rcu * const *head;
struct bucket_table *tbl;
struct rhash_head *he;
unsigned int hash;
@@ -478,13 +481,19 @@ static inline struct rhash_head *__rhashtable_lookup(
tbl = rht_dereference_rcu(ht->tbl, ht);
restart:
hash = rht_key_hashfn(ht, tbl, key, params);
- rht_for_each_rcu(he, tbl, hash) {
- if (params.obj_cmpfn ?
- params.obj_cmpfn(&arg, rht_obj(ht, he)) :
- rhashtable_compare(&arg, rht_obj(ht, he)))
- continue;
- return he;
- }
+ head = rht_bucket(tbl, hash);
+ do {
+ rht_for_each_rcu_continue(he, *head, tbl, hash) {
+ if (params.obj_cmpfn ?
+ params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+ rhashtable_compare(&arg, rht_obj(ht, he)))
+ continue;
+ return he;
+ }
+ /* An object might have been moved to a different hash chain,
+ * while we walk along it - better check and retry.
+ */
+ } while (he != RHT_NULLS_MARKER(head));

/* Ensure we see any new tables. */
smp_rmb();
@@ -580,6 +589,7 @@ static inline void *__rhashtable_insert_fast(
.ht = ht,
.key = key,
};
+ struct rhash_head __rcu **headp;
struct rhash_head __rcu **pprev;
struct bucket_table *tbl;
struct rhash_head *head;
@@ -603,12 +613,13 @@ static inline void *__rhashtable_insert_fast(
}

elasticity = RHT_ELASTICITY;
- pprev = rht_bucket_insert(ht, tbl, hash);
+ headp = rht_bucket_insert(ht, tbl, hash);
+ pprev = headp;
data = ERR_PTR(-ENOMEM);
if (!pprev)
goto out;

- rht_for_each_continue(head, *pprev, tbl, hash) {
+ rht_for_each_continue(head, *headp, tbl, hash) {
struct rhlist_head *plist;
struct rhlist_head *list;

@@ -648,7 +659,7 @@ static inline void *__rhashtable_insert_fast(
if (unlikely(rht_grow_above_100(ht, tbl)))
goto slow_path;

- head = rht_dereference_bucket(*pprev, tbl, hash);
+ head = rht_dereference_bucket(*headp, tbl, hash);

RCU_INIT_POINTER(obj->next, head);
if (rhlist) {
@@ -658,7 +669,7 @@ static inline void *__rhashtable_insert_fast(
RCU_INIT_POINTER(list->next, NULL);
}

- rcu_assign_pointer(*pprev, obj);
+ rcu_assign_pointer(*headp, obj);

atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl))
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 0e04947b7e0c..f87af707f086 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1164,8 +1164,7 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
unsigned int hash)
{
const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
- static struct rhash_head __rcu *rhnull =
- (struct rhash_head __rcu *)NULLS_MARKER(0);
+ static struct rhash_head __rcu *rhnull;
unsigned int index = hash & ((1 << tbl->nest) - 1);
unsigned int size = tbl->size >> tbl->nest;
unsigned int subhash = hash;
@@ -1183,8 +1182,11 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
subhash >>= shift;
}

- if (!ntbl)
+ if (!ntbl) {
+ if (!rhnull)
+ INIT_RHT_NULLS_HEAD(rhnull, NULL, 0);
return &rhnull;
+ }

return &ntbl[subhash].bucket;

--
2.14.0.rc0.dirty

Attachment: signature.asc
Description: PGP signature