Re: [PATCH v4 09/10] iommu: observability of the IOMMU allocations

From: Robin Murphy
Date: Fri Feb 09 2024 - 06:18:09 EST


On 2024-02-07 5:41 pm, Pasha Tatashin wrote:
Add NR_IOMMU_PAGES into node_stat_item that counts number of pages
that are allocated by the IOMMU subsystem.

The allocations can be view per-node via:
/sys/devices/system/node/nodeN/vmstat.

For example:

$ grep iommu /sys/devices/system/node/node*/vmstat
/sys/devices/system/node/node0/vmstat:nr_iommu_pages 106025
/sys/devices/system/node/node1/vmstat:nr_iommu_pages 3464

The value is in page-count, therefore, in the above example
the iommu allocations amount to ~428M.

Signed-off-by: Pasha Tatashin <pasha.tatashin@xxxxxxxxxx>
Acked-by: David Rientjes <rientjes@xxxxxxxxxx>
Tested-by: Bagas Sanjaya <bagasdotme@xxxxxxxxx>
---
drivers/iommu/iommu-pages.h | 30 ++++++++++++++++++++++++++++++
include/linux/mmzone.h | 3 +++
mm/vmstat.c | 3 +++
3 files changed, 36 insertions(+)

diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h
index c412d0aaa399..7336f976b641 100644
--- a/drivers/iommu/iommu-pages.h
+++ b/drivers/iommu/iommu-pages.h
@@ -17,6 +17,30 @@
* state can be rather large, i.e. multiple gigabytes in size.
*/
+/**
+ * __iommu_alloc_account - account for newly allocated page.
+ * @page: head struct page of the page.
+ * @order: order of the page
+ */
+static inline void __iommu_alloc_account(struct page *page, int order)
+{
+ const long pgcnt = 1l << order;
+
+ mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, pgcnt);
+}
+
+/**
+ * __iommu_free_account - account a page that is about to be freed.
+ * @page: head struct page of the page.
+ * @order: order of the page
+ */
+static inline void __iommu_free_account(struct page *page, int order)
+{
+ const long pgcnt = 1l << order;
+
+ mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, -pgcnt);
+}
+
/**
* __iommu_alloc_pages_node - allocate a zeroed page of a given order from
* specific NUMA node.
@@ -35,6 +59,8 @@ static inline struct page *__iommu_alloc_pages_node(int nid, gfp_t gfp,
if (unlikely(!page))
return NULL;
+ __iommu_alloc_account(page, order);
+
return page;
}
@@ -53,6 +79,8 @@ static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order)
if (unlikely(!page))
return NULL;
+ __iommu_alloc_account(page, order);
+
return page;
}
@@ -89,6 +117,7 @@ static inline void __iommu_free_pages(struct page *page, int order)
if (!page)
return;
+ __iommu_free_account(page, order);
__free_pages(page, order);
}
@@ -197,6 +226,7 @@ static inline void iommu_free_pages_list(struct list_head *page)
struct page *p = list_entry(page->prev, struct page, lru);
list_del(&p->lru);
+ __iommu_free_account(p, 0);

I'm keen to revive my patches to hook up freelist support in io-pgtable-arm, which would then mean a chance of higher-order GFP_COMP allocations coming back though this path - do you have any pointers for what I'd have to do here to make it work properly?

Thanks,
Robin.

put_page(p);
}
}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a497f189d988..bb6bc504915a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -203,6 +203,9 @@ enum node_stat_item {
#endif
NR_PAGETABLE, /* used for pagetables */
NR_SECONDARY_PAGETABLE, /* secondary pagetables, e.g. KVM pagetables */
+#ifdef CONFIG_IOMMU_SUPPORT
+ NR_IOMMU_PAGES, /* # of pages allocated by IOMMU */
+#endif
#ifdef CONFIG_SWAP
NR_SWAPCACHE,
#endif
diff --git a/mm/vmstat.c b/mm/vmstat.c
index db79935e4a54..8507c497218b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1242,6 +1242,9 @@ const char * const vmstat_text[] = {
#endif
"nr_page_table_pages",
"nr_sec_page_table_pages",
+#ifdef CONFIG_IOMMU_SUPPORT
+ "nr_iommu_pages",
+#endif
#ifdef CONFIG_SWAP
"nr_swapcached",
#endif