Re: [PATCH rdma-next 4/7] RDMA/mlx5: Change the cache structure to an RB-tree

From: Jason Gunthorpe
Date: Wed Dec 08 2021 - 15:58:37 EST


On Mon, Dec 06, 2021 at 11:10:49AM +0200, Leon Romanovsky wrote:

> +static struct mlx5_cache_ent *ent_search(struct mlx5_mr_cache *cache, void *mkc)

mlx5_cache_ent_find() ?

This search isn't really what I would expect, it should stop if it
finds a (somewhat) larger and otherwise compatible entry instead of
continuing to bin on power of two sizes.

> + struct rb_node *node = cache->cache_root.rb_node;
> + int size = MLX5_ST_SZ_BYTES(mkc);

size_t not int

> + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
> + if (dev->cache.maintained_cache && !force) {
> + int order;

unsigned int

> + /*
> + * Try to get an mkey from pool.
> + */
> + order = order_base_2(ndescs) > 2 ? order_base_2(ndescs) : 2;
> + MLX5_SET(mkc, mkc, translations_octword_size,
> + get_mkc_octo_size(access_mode, 1 << order));
> + mutex_lock(&dev->cache.cache_lock);
> + ent = ent_search(&dev->cache, mkc);
> + mutex_unlock(&dev->cache.cache_lock);

Why is it OK to drop the lock here? What is protecting the ent pointer
against free?

> + if (ent && (ent->limit || force)) {

What locking protects ent->limit ?

> + xa_lock_irq(&ent->mkeys);
> + if (!ent->stored) {
> + if (ent->limit) {
> + queue_adjust_cache_locked(ent);
> + ent->miss++;
> + }
> + xa_unlock_irq(&ent->mkeys);
> +
> + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
> + if (err)
> + goto err;

So if there is no entry we create a bigger one rounded up to pow2?

> +
> + WRITE_ONCE(ent->dev->cache.last_add, jiffies);
> + xa_lock_irq(&ent->mkeys);
> + ent->total_mrs++;
> + xa_unlock_irq(&ent->mkeys);

May as well optimize for the normal case, do the total_mrs++ before
create_mkey while we still have the lock and undo it if it fails. It
is just minor stat reporting right?

> + } else {
> + xa_ent = __xa_store(&ent->mkeys, --ent->stored,
> + NULL, GFP_KERNEL);
> + WARN_ON(xa_ent == NULL || xa_is_err(xa_ent));
> + WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) !=
> + NULL);

This whole bigger block want to be in its own function 'mlx5_ent_get_mkey()'

Then you can write it simpler without all the duplicated error
handling and deep indenting

> queue_adjust_cache_locked(ent);
> - ent->miss++;
> - }
> - xa_unlock_irq(&ent->mkeys);
> - err = create_cache_mkey(ent, &mr->mmkey.key);
> - if (err) {
> - kfree(mr);
> - return ERR_PTR(err);
> + xa_unlock_irq(&ent->mkeys);
> + mr->mmkey.key = (u32)xa_to_value(xa_ent);
> }
> + mr->cache_ent = ent;
> } else {
> - mr = __xa_store(&ent->mkeys, --ent->stored, NULL,
> - GFP_KERNEL);
> - WARN_ON(mr == NULL || xa_is_err(mr));
> - WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) != NULL);
> - queue_adjust_cache_locked(ent);
> - xa_unlock_irq(&ent->mkeys);
> -
> - mr->mmkey.key = (u32)xa_to_value(xa_ent);
> + /*
> + * Can not use a cache mkey.
> + * Create an mkey with the exact needed size.
> + */
> + MLX5_SET(mkc, mkc, translations_octword_size,
> + get_mkc_octo_size(access_mode, ndescs));
> + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
> + if (err)
> + goto err;
> }

I think this needs to be broken to functions, it is hard to read with
all these ifs and inlined locking

>
> +static int ent_insert(struct mlx5_mr_cache *cache, struct mlx5_cache_ent *ent)
> +{
> + struct rb_node **new = &cache->cache_root.rb_node, *parent = NULL;
> + int size = MLX5_ST_SZ_BYTES(mkc);
> + struct mlx5_cache_ent *cur;
> + int cmp;
> +
> + /* Figure out where to put new node */
> + while (*new) {
> + cur = container_of(*new, struct mlx5_cache_ent, node);
> + parent = *new;
> + cmp = memcmp(ent->mkc, cur->mkc, size);
> + if (cmp < 0)
> + new = &((*new)->rb_left);
> + else if (cmp > 0)
> + new = &((*new)->rb_right);
> + else
> + return -EEXIST;
> + }
> +
> + /* Add new node and rebalance tree. */
> + rb_link_node(&ent->node, parent, new);
> + rb_insert_color(&ent->node, &cache->cache_root);
> +
> + return 0;
> +}

This should be near the find

> +
> +static struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev,
> + int order)
> +{
> + struct mlx5_cache_ent *ent;
> + int ret;
> +
> + ent = kzalloc(sizeof(*ent), GFP_KERNEL);
> + if (!ent)
> + return ERR_PTR(-ENOMEM);
> +
> + ent->mkc = kzalloc(MLX5_ST_SZ_BYTES(mkc), GFP_KERNEL);
> + if (!ent->mkc) {
> + kfree(ent);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + ent->ndescs = 1 << order;
> + mlx5_set_cache_mkc(dev, ent->mkc, 0, MLX5_MKC_ACCESS_MODE_MTT,
> + PAGE_SHIFT);
> + MLX5_SET(mkc, ent->mkc, translations_octword_size,
> + get_mkc_octo_size(MLX5_MKC_ACCESS_MODE_MTT, ent->ndescs));
> + mutex_lock(&dev->cache.cache_lock);
> + ret = ent_insert(&dev->cache, ent);
> + mutex_unlock(&dev->cache.cache_lock);
> + if (ret) {
> + kfree(ent->mkc);
> + kfree(ent);
> + return ERR_PTR(ret);
> + }
> +
> + xa_init_flags(&ent->mkeys, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
> + ent->dev = dev;
> +
> + INIT_WORK(&ent->work, cache_work_func);
> + INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);

And the ent should be fully setup before adding it to the cache

Jason