[RFC PATCH v2 3/9] iommu/arm-smmu-v3: Issue invalidations commands to multiple SMMUs

From: Michael Shavit
Date: Tue Aug 22 2023 - 06:58:11 EST


Assume that devices in the smmu_domain->domain list that belong to the
same SMMU are adjacent to each other in the list.
Batch TLB/ATC invalidation commands for an smmu_domain by the SMMU
devices that the domain is installed to.

Signed-off-by: Michael Shavit <mshavit@xxxxxxxxxx>
---

Changes in v2:
- Moved the ARM_SMMU_FEAT_BTM changes into a new prepatory commit

.../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 6 +-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 134 +++++++++++++-----
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 +-
3 files changed, 104 insertions(+), 38 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 53f65a89a55f9..fe88a7880ad57 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -112,7 +112,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);

/* Invalidate TLB entries previously associated with that context */
- arm_smmu_tlb_inv_asid(smmu, asid);
+ arm_smmu_tlb_inv_asid(smmu_domain, asid);

xa_erase(&arm_smmu_asid_xa, asid);
return NULL;
@@ -252,7 +252,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
*/
arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);

- arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
+ arm_smmu_tlb_inv_asid(smmu_domain, smmu_mn->cd->asid);
arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);

smmu_mn->cleared = true;
@@ -340,7 +340,7 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
* new TLB entry can have been formed.
*/
if (!smmu_mn->cleared) {
- arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid);
+ arm_smmu_tlb_inv_asid(smmu_domain, cd->asid);
arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);
}

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index db4df9d6aef10..1d072fd38a2d6 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -960,15 +960,28 @@ static int arm_smmu_page_response(struct device *dev,
}

/* Context descriptor manipulation functions */
-void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
+void arm_smmu_tlb_inv_asid(struct arm_smmu_domain *smmu_domain, u16 asid)
{
+ struct arm_smmu_device *smmu = NULL;
+ struct arm_smmu_master *master;
struct arm_smmu_cmdq_ent cmd = {
- .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
- CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
.tlbi.asid = asid,
};
+ unsigned long flags;

- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices,
+ domain_head) {
+ if (!smmu)
+ smmu = master->smmu;
+ if (smmu != master->smmu ||
+ list_is_last(&master->domain_head, &smmu_domain->devices)) {
+ cmd.opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ }
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
}

static void arm_smmu_sync_cd(struct arm_smmu_master *master,
@@ -1811,14 +1824,13 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
unsigned long iova, size_t size)
{
int i;
+ int ret = 0;
unsigned long flags;
struct arm_smmu_cmdq_ent cmd;
+ struct arm_smmu_device *smmu = NULL;
struct arm_smmu_master *master;
struct arm_smmu_cmdq_batch cmds;

- if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
- return 0;
-
/*
* Ensure that we've completed prior invalidation of the main TLBs
* before we read 'nr_ats_masters' in case of a concurrent call to
@@ -1839,28 +1851,56 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);

cmds.num = 0;
-
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master, &smmu_domain->devices, domain_head) {
if (!master->ats_enabled)
continue;
+ if (!smmu)
+ smmu = master->smmu;
+ if (smmu != master->smmu ||
+ list_is_last(&master->domain_head, &smmu_domain->devices)) {
+ ret = arm_smmu_cmdq_batch_submit(smmu, &cmds);
+ if (ret)
+ break;
+ cmds.num = 0;
+ }

for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id;
- arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
}
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);

- return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
+ return ret;
+}
+
+static void arm_smmu_tlb_inv_vmid(struct arm_smmu_domain *smmu_domain)
+{
+ struct arm_smmu_device *smmu = NULL;
+ struct arm_smmu_master *master;
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = CMDQ_OP_TLBI_S12_VMALL,
+ .tlbi.vmid = smmu_domain->s2_cfg.vmid,
+ };
+ unsigned long flags;
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices,
+ domain_head) {
+ if (!smmu)
+ smmu = master->smmu;
+ if (smmu != master->smmu ||
+ list_is_last(&master->domain_head, &smmu_domain->devices))
+ arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
}

/* IO_PGTABLE API */
static void arm_smmu_tlb_inv_context(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_cmdq_ent cmd;

/*
* NOTE: when io-pgtable is in non-strict mode, we may get here with
@@ -1870,11 +1910,9 @@ static void arm_smmu_tlb_inv_context(void *cookie)
* careful, 007.
*/
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
+ arm_smmu_tlb_inv_asid(smmu_domain, smmu_domain->cd.asid);
} else {
- cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
- cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
- arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+ arm_smmu_tlb_inv_vmid(smmu_domain);
}
arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
}
@@ -1882,9 +1920,9 @@ static void arm_smmu_tlb_inv_context(void *cookie)
static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
unsigned long iova, size_t size,
size_t granule,
- struct arm_smmu_domain *smmu_domain)
+ struct arm_smmu_domain *smmu_domain,
+ struct arm_smmu_device *smmu)
{
- struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned long end = iova + size, num_pages = 0, tg = 0;
size_t inv_range = granule;
struct arm_smmu_cmdq_batch cmds;
@@ -1949,21 +1987,36 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
size_t granule, bool leaf,
struct arm_smmu_domain *smmu_domain)
{
+ struct arm_smmu_device *smmu = NULL;
+ struct arm_smmu_master *master;
struct arm_smmu_cmdq_ent cmd = {
.tlbi = {
.leaf = leaf,
},
};
+ unsigned long flags;

- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
- CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
- cmd.tlbi.asid = smmu_domain->cd.asid;
- } else {
- cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
- cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ if (!smmu)
+ smmu = master->smmu;
+ if (smmu != master->smmu ||
+ list_is_last(&master->domain_head, &smmu_domain->devices)) {
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ cmd.opcode = smmu->features &
+ ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_VA :
+ CMDQ_OP_TLBI_NH_VA;
+ cmd.tlbi.asid = smmu_domain->cd.asid;
+ } else {
+ cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
+ cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
+ }
+ __arm_smmu_tlb_inv_range(&cmd, iova, size, granule,
+ smmu_domain, smmu);
+ }
}
- __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);

/*
* Unfortunately, this can't be leaf-only since we may have
@@ -1977,19 +2030,33 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
bool skip_btm_capable_devices,
struct arm_smmu_domain *smmu_domain)
{
+ struct arm_smmu_device *smmu = NULL;
+ struct arm_smmu_master *master;
struct arm_smmu_cmdq_ent cmd = {
- .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
- CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
.tlbi = {
.asid = asid,
.leaf = leaf,
},
};
+ unsigned long flags;

- if (skip_btm_capable_devices &&
- smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)
- return;
- __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ if (!smmu)
+ smmu = master->smmu;
+ if (smmu != master->smmu ||
+ list_is_last(&master->domain_head, &smmu_domain->devices)) {
+ if (skip_btm_capable_devices &&
+ smmu->features & ARM_SMMU_FEAT_BTM)
+ continue;
+ cmd.opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
+ CMDQ_OP_TLBI_EL2_VA :
+ CMDQ_OP_TLBI_NH_VA;
+ __arm_smmu_tlb_inv_range(&cmd, iova, size, granule,
+ smmu_domain, smmu);
+ }
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
}

static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
@@ -2523,8 +2590,7 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);

- if (smmu_domain->smmu)
- arm_smmu_tlb_inv_context(smmu_domain);
+ arm_smmu_tlb_inv_context(smmu_domain);
}

static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 05599914eb0a0..b0cf9c33e6bcd 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -748,7 +748,7 @@ extern struct arm_smmu_ctx_desc quiet_cd;

int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
struct arm_smmu_ctx_desc *cd);
-void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
+void arm_smmu_tlb_inv_asid(struct arm_smmu_domain *smmu_domain, u16 asid);
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
size_t granule, bool leaf,
bool skip_btm_capable_devices,
--
2.42.0.rc1.204.g551eb34607-goog