[PATCH 4/5] iommu/vt-d: debugfs: Support dumping a specified page table

From: Jingqi Liu
Date: Sun Jun 25 2023 - 11:18:33 EST


The original debugfs only dumps all page tables without pasid. With
pasid supported, the page table with pasid also needs to be dumped.

This patch supports dumping a specified page table or all page tables in
legacy mode or scalable mode.

For legacy mode, according to bus number and DEVFN, traverse the root
table and context table to get the pointer of page table in the context
table entry, then dump the page table.

For scalable mode, according to bus number, DEVFN and pasid, traverse
the root table, context table, pasid directory and pasid table to get
the pointer of page table in the pasid table entry, then dump it.

Examples are as follows:
1) Dump the page table of device "00:1f.0" that only supports legacy mode.
$ sudo echo 00:1f.0 >
/sys/kernel/debug/iommu/intel/domain_translation_struct
$ sudo cat /sys/kernel/debug/iommu/intel/domain_translation_struct

2) Dump the page table of device "00:0a.0" with PASID "1".
$ sudo echo 00:0a.0,1 >
/sys/kernel/debug/iommu/intel/domain_translation_struct
$ sudo cat /sys/kernel/debug/iommu/intel/domain_translation_struct

3) Dump all page tables.
$ sudo echo "auto" >
/sys/kernel/debug/iommu/intel/domain_translation_struct
$ sudo cat /sys/kernel/debug/iommu/intel/domain_translation_struct

Signed-off-by: Jingqi Liu <Jingqi.liu@xxxxxxxxx>
---
drivers/iommu/intel/debugfs.c | 191 ++++++++++++++++++++++++++++------
1 file changed, 159 insertions(+), 32 deletions(-)

diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index 212d33598de9..e4d3b7836076 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -404,56 +404,183 @@ static void dump_translation_page_table(struct seq_file *m)
return;
}

-static int __show_device_domain_translation(struct device *dev, void *data)
+/*
+ * Dump the page table with the specified device and pasid.
+ * For legacy mode, search root and context tables to find
+ * the page table.
+ * For scalable mode, search root, context, pasid directory
+ * and pasid tables to find the page table.
+ * If not specify device, it will traverse all devices and
+ * pasid tables, and then dump all page tables.
+ */
+static int show_device_domain_translation(struct show_domain_info *sinfo,
+ void *data)
{
- struct dmar_domain *domain;
+ bool walk_tbl = false, found = false;
+ u16 s_devfn = 0, e_devfn = 255, devfn;
+ u16 s_bus = 0, e_bus = 255, bus, seg;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
struct seq_file *m = data;
- u64 path[6] = { 0 };
+ bool scalable;

- domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
- if (!domain)
- return 0;
+ if (sinfo && sinfo->pdev) {
+ s_bus = sinfo->pdev->bus->number;
+ e_bus = sinfo->pdev->bus->number;
+ s_devfn = sinfo->pdev->devfn;
+ e_devfn = sinfo->pdev->devfn;
+ seg = pci_domain_nr(sinfo->pdev->bus);
+ } else
+ walk_tbl = true;

- seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
- (u64)virt_to_phys(domain->pgd));
- seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
-
- pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
- seq_putc(m, '\n');
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ struct context_entry *context;
+ u64 pgd, path[6] = { 0 };
+ u32 sts, agaw;

- /* Don't iterate */
- return 1;
-}
+ if (sinfo && sinfo->pdev && (seg != iommu->segment))
+ continue;

-static int show_device_domain_translation(struct device *dev, void *data)
-{
- struct iommu_group *group;
+ sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_TES)) {
+ seq_printf(m, "DMA Remapping is not enabled on %s\n",
+ iommu->name);
+ continue;
+ }
+ if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)
+ scalable = true;
+ else
+ scalable = false;

- group = iommu_group_get(dev);
- if (group) {
/*
- * The group->mutex is held across the callback, which will
- * block calls to iommu_attach/detach_group/device. Hence,
+ * The iommu->lock is held across the callback, which will
+ * block calls to domain_attach/domain_detach. Hence,
* the domain of the device will not change during traversal.
*
- * All devices in an iommu group share a single domain, hence
- * we only dump the domain of the first device. Even though,
- * this code still possibly races with the iommu_unmap()
- * interface. This could be solved by RCU-freeing the page
- * table pages in the iommu_unmap() path.
+ * Traversing page table possibly races with the iommu_unmap()
+ * interface. This could be solved by incrementing the
+ * reference count of page table page before traversal and
+ * decrementing the reference count after traversal.
*/
- iommu_group_for_each_dev(group, data,
- __show_device_domain_translation);
- iommu_group_put(group);
+ spin_lock(&iommu->lock);
+ for (bus = s_bus; bus <= e_bus; bus++) {
+ for (devfn = s_devfn; devfn <= e_devfn; devfn++) {
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ if (!context || !context_present(context))
+ continue;
+
+ if (!scalable) { /* legacy mode */
+ pgd = context->lo & VTD_PAGE_MASK;
+ agaw = context->hi & 7;
+
+ seq_printf(m, "Device %04x:%02x:%02x.%x @0x%llx\n",
+ iommu->segment, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pgd);
+ seq_printf(m, "%-17s\t%-18s\t%-18s\t%-18s\t%-18s\t%-s\n",
+ "IOVA_PFN", "PML5E", "PML4E", "PDPE", "PDE", "PTE");
+ pgtable_walk_level(m, phys_to_virt(pgd), agaw + 2, 0, path);
+ seq_putc(m, '\n');
+
+ found = true;
+ } else { /* scalable mode */
+ struct tbl_walk tbl_wlk = {0};
+ struct pasid_dir_entry *dir_tbl, *dir_entry;
+ struct pasid_entry *pasid_tbl, *pasid_tbl_entry;
+ u16 pasid_dir_size, dir_idx, tbl_idx;
+ u64 pasid_dir_ptr;
+
+ tbl_wlk.segment = iommu->segment;
+ tbl_wlk.bus = bus;
+ tbl_wlk.devfn = devfn;
+ tbl_wlk.rt_entry = &iommu->root_entry[bus];
+ tbl_wlk.ctx_entry = context;
+ tbl_wlk.dump_page_table = true;
+ m->private = &tbl_wlk;
+
+ pasid_dir_ptr = context->lo & VTD_PAGE_MASK;
+ pasid_dir_size = get_pasid_dir_size(context);
+
+ if (walk_tbl) {
+ pasid_dir_walk(m, pasid_dir_ptr, pasid_dir_size);
+ continue;
+ }
+
+ if (sinfo && sinfo->pasid == INVALID_IOASID) {
+ spin_unlock(&iommu->lock);
+ goto unlock_out;
+ }
+
+ /* Dump specified device domain mappings with PASID. */
+ dir_idx = sinfo->pasid >> PASID_PDE_SHIFT;
+ tbl_idx = sinfo->pasid & PASID_PTE_MASK;
+
+ dir_tbl = phys_to_virt(pasid_dir_ptr);
+ dir_entry = &dir_tbl[dir_idx];
+
+ pasid_tbl = get_pasid_table_from_pde(dir_entry);
+ if (!pasid_tbl)
+ continue;
+
+ pasid_tbl_entry = &pasid_tbl[tbl_idx];
+ if (!pasid_pte_is_present(pasid_tbl_entry))
+ continue;
+
+ tbl_wlk.pasid = sinfo->pasid;
+ tbl_wlk.pasid_tbl_entry = pasid_tbl_entry;
+ dump_translation_page_table(m);
+
+ found = true;
+ }
+ }
+ }
+
+ spin_unlock(&iommu->lock);
+ if (!walk_tbl && found)
+ break;
}

+unlock_out:
+ rcu_read_unlock();
+
+ if (!walk_tbl && !found && (sinfo->pasid != INVALID_IOASID))
+ seq_printf(m, "No mappings found on device %s with pasid %x.\n",
+ dev_name(&sinfo->pdev->dev), sinfo->pasid);
return 0;
}

static int domain_translation_struct_show(struct seq_file *m, void *unused)
{
- return bus_for_each_dev(&pci_bus_type, NULL, m,
- show_device_domain_translation);
+ int ret;
+
+ if (show_domain_info && show_domain_info->pdev) {
+ struct device_domain_info *info =
+ dev_iommu_priv_get(&show_domain_info->pdev->dev);
+
+ if (info) {
+ /*
+ * The domain has already exited, and will
+ * switch to the default domain next.
+ */
+ if (!info->domain)
+ return 0;
+
+ if (info->pasid_enabled &&
+ (show_domain_info->pasid == INVALID_IOASID))
+ show_domain_info->pasid = PASID_RID2PASID;
+ else if (!info->pasid_enabled &&
+ (show_domain_info->pasid != INVALID_IOASID)) {
+ seq_printf(m, "Device %s does not support PASID.\n",
+ dev_name(&show_domain_info->pdev->dev));
+ return 0;
+ }
+ } else
+ show_domain_info->pasid = PASID_RID2PASID;
+
+ ret = show_device_domain_translation(show_domain_info, m);
+ } else
+ ret = show_device_domain_translation(NULL, m);
+
+ return ret;
}

static ssize_t domain_translation_struct_write(struct file *filp,
--
2.21.3