Re: [PATCH 1/2] iommu/iova: Make the rcache depot scale better

From: Robin Murphy
Date: Mon Aug 21 2023 - 08:28:46 EST


On 2023-08-21 13:02, John Garry wrote:
On 14/08/2023 18:53, Robin Murphy wrote:
The algorithm in the original paper specifies the storage of full
magazines in the depot as an unbounded list rather than a fixed-size
array. It turns out to be pretty straightforward to do this in our
implementation with no significant loss of efficiency. This allows
the depot to scale up to the working set sizes of larger systems,
while also potentially saving some memory on smaller ones too.

Signed-off-by: Robin Murphy <robin.murphy@xxxxxxx>

This looks ok (ignoring the crash reported), so feel free to add:

Reviewed-by: John Garry <john.g.garry@xxxxxxxxxx>

Thanks!

A small comment and question below.

---
  drivers/iommu/iova.c | 65 ++++++++++++++++++++++++--------------------
  1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 10b964600948..d2de6fb0e9f4 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -625,10 +625,16 @@ EXPORT_SYMBOL_GPL(reserve_iova);
   * will be wasted.
   */
  #define IOVA_MAG_SIZE 127
-#define MAX_GLOBAL_MAGS 32    /* magazines per bin */
  struct iova_magazine {
-    unsigned long size;
+    /*
+     * Only full magazines are inserted into the depot, so we can avoid
+     * a separate list head and preserve maximum space-efficiency.

It might be worth explicitly mentioning that we try to keep total mag size as power-of-2

Sure, I can tie it in with the existing comment above, which might actually end up more readable anyway.

+     */
+    union {
+        unsigned long size;
+        struct iova_magazine *next;
+    };
      unsigned long pfns[IOVA_MAG_SIZE];
  };
@@ -640,8 +646,7 @@ struct iova_cpu_rcache {
  struct iova_rcache {
      spinlock_t lock;
-    unsigned long depot_size;
-    struct iova_magazine *depot[MAX_GLOBAL_MAGS];
+    struct iova_magazine *depot;
      struct iova_cpu_rcache __percpu *cpu_rcaches;
  };
@@ -717,6 +722,21 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
      mag->pfns[mag->size++] = pfn;
  }
+static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
+{
+    struct iova_magazine *mag = rcache->depot;
+
+    rcache->depot = mag->next;
+    mag->size = IOVA_MAG_SIZE;
+    return mag;
+}
+
+static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag)
+{
+    mag->next = rcache->depot;
+    rcache->depot = mag;
+}
+
  int iova_domain_init_rcaches(struct iova_domain *iovad)
  {
      unsigned int cpu;
@@ -734,7 +754,6 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
          rcache = &iovad->rcaches[i];
          spin_lock_init(&rcache->lock);
-        rcache->depot_size = 0;
          rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
                               cache_line_size());
          if (!rcache->cpu_rcaches) {
@@ -776,7 +795,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
                   struct iova_rcache *rcache,
                   unsigned long iova_pfn)
  {
-    struct iova_magazine *mag_to_free = NULL;
      struct iova_cpu_rcache *cpu_rcache;
      bool can_insert = false;
      unsigned long flags;
@@ -794,12 +812,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
          if (new_mag) {
              spin_lock(&rcache->lock);
-            if (rcache->depot_size < MAX_GLOBAL_MAGS) {
-                rcache->depot[rcache->depot_size++] =
-                        cpu_rcache->loaded;
-            } else {
-                mag_to_free = cpu_rcache->loaded;
-            }
+            iova_depot_push(rcache, cpu_rcache->loaded);
              spin_unlock(&rcache->lock);

Out of curiosity, do you know why we take the approach (prior to this change) to free the loaded mag and alloc a new empty mag? Why not instead just say that we can't insert and bail out?

I have a feeling it must have been mentioned at some point, since my memory says there was a deliberate intent to keep the flow through the critical section simple and consistent, and minimise time spent holding the rcache lock, and I'm 99% sure that isn't my own inferred reasoning...

Cheers,
Robin.

              cpu_rcache->loaded = new_mag;
@@ -812,11 +825,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
      spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-    if (mag_to_free) {
-        iova_magazine_free_pfns(mag_to_free, iovad);
-        iova_magazine_free(mag_to_free);
-    }
-
      return can_insert;
  }
@@ -854,9 +862,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
          has_pfn = true;
      } else {
          spin_lock(&rcache->lock);
-        if (rcache->depot_size > 0) {
+        if (rcache->depot) {
              iova_magazine_free(cpu_rcache->loaded);
-            cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
+            cpu_rcache->loaded = iova_depot_pop(rcache);
              has_pfn = true;
          }
          spin_unlock(&rcache->lock);
@@ -894,10 +902,10 @@ static void free_iova_rcaches(struct iova_domain *iovad)
  {
      struct iova_rcache *rcache;
      struct iova_cpu_rcache *cpu_rcache;
+    struct iova_magazine *mag;
      unsigned int cpu;
-    int i, j;
-    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+    for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
          rcache = &iovad->rcaches[i];
          if (!rcache->cpu_rcaches)
              break;
@@ -907,8 +915,8 @@ static void free_iova_rcaches(struct iova_domain *iovad)
              iova_magazine_free(cpu_rcache->prev);
          }
          free_percpu(rcache->cpu_rcaches);
-        for (j = 0; j < rcache->depot_size; ++j)
-            iova_magazine_free(rcache->depot[j]);
+        while ((mag = iova_depot_pop(rcache)))
+            iova_magazine_free(mag);
      }
      kfree(iovad->rcaches);
@@ -941,17 +949,16 @@ static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
  static void free_global_cached_iovas(struct iova_domain *iovad)
  {
      struct iova_rcache *rcache;
+    struct iova_magazine *mag;
      unsigned long flags;
-    int i, j;
-    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+    for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
          rcache = &iovad->rcaches[i];
          spin_lock_irqsave(&rcache->lock, flags);
-        for (j = 0; j < rcache->depot_size; ++j) {
-            iova_magazine_free_pfns(rcache->depot[j], iovad);
-            iova_magazine_free(rcache->depot[j]);
+        while ((mag = iova_depot_pop(rcache))) {
+            iova_magazine_free_pfns(mag, iovad);
+            iova_magazine_free(mag);
          }
-        rcache->depot_size = 0;
          spin_unlock_irqrestore(&rcache->lock, flags);
      }
  }