Re: [PATCH v4 2/2] iommu/arm-smmu-v3: Refactor arm_smmu_write_strtab_ent()

From: Jason Gunthorpe
Date: Mon Sep 25 2023 - 14:36:41 EST


On Wed, Sep 20, 2023 at 01:52:04PM -0700, Nicolin Chen wrote:
>
> +static void arm_smmu_ste_stage2_translate(struct arm_smmu_master *master,
> + u64 *ste)
> +{
> + struct arm_smmu_domain *smmu_domain = master->domain;
> + struct arm_smmu_device *smmu = master->smmu;
> + struct arm_smmu_s2_cfg *s2_cfg;
> +
> + switch (smmu_domain->stage) {
> + case ARM_SMMU_DOMAIN_NESTED:
> + case ARM_SMMU_DOMAIN_S2:
> + s2_cfg = &smmu_domain->s2_cfg;
> + break;
> + default:
> + WARN_ON(1);
> + return;
> + }
> +
> + ste[0] |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
> +
> + if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
> + ste[1] |= STRTAB_STE_1_S1STALLD;
> +
> + if (master->ats_enabled)
> + ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);

These master bits probably belong in their own function 'setup ste for master'

The s1 and s2 cases are duplicating these things.

> +
> + ste[2] |= FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
> + FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
> +#ifdef __BIG_ENDIAN
> + STRTAB_STE_2_S2ENDI |
> +#endif
> + STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R;
> +
> + ste[3] |= s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK;
> +}
> +
> +static void arm_smmu_ste_stage1_translate(struct arm_smmu_master *master,
> + u64 *ste)
> +{

Lets stop calling the cdtable 'stage 1' please, it is confusing.

arm_smmu_ste_cdtable()

> + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> + struct arm_smmu_device *smmu = master->smmu;
> + __le64 *cdptr = arm_smmu_get_cd_ptr(master, 0);
> +
> + WARN_ON_ONCE(!cdptr);
> +
> + ste[0] |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> + FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
> + FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
> +
> + if (FIELD_GET(CTXDESC_CD_0_ASID, le64_to_cpu(cdptr[0])))

Reading the CD like that seems like a hacky way to detect that the RID
domain is bypass, just do it directly:

if (master->domain->stage == ARM_SMMU_DOMAIN_BYPASS)
ste[1] |= FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_BYPASS);
else
ste[1] |= FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0);

> + ste[1] |= FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING) |
> + FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> + FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> + FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
> +
> + if (smmu->features & ARM_SMMU_FEAT_E2H)
> + ste[1] |= FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2);
> + else
> + ste[1] |= FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1);
> +
> + if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
> + ste[1] |= STRTAB_STE_1_S1STALLD;
> +
> + if (master->ats_enabled)
> + ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);
> +
> + if (master->domain->stage == ARM_SMMU_DOMAIN_NESTED)
> + arm_smmu_ste_stage2_translate(master, ste);

I think this needs a comment

/*
* SMMUv3 does not support using a S2 domain and a CD table for anything
* other than nesting where the S2 is the translation for the CD
* table, and all associated S1s.
*/

> + if (le64_to_cpu(dst[0]) & STRTAB_STE_0_V) {
> + switch (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(dst[0]))) {
> case STRTAB_STE_0_CFG_BYPASS:
> break;
> case STRTAB_STE_0_CFG_S1_TRANS:

This thing could go into a function 'ste_is_live' too

> + ste[0] = STRTAB_STE_0_V;
>
> + if (master->cd_table.cdtab && master->domain) {

I think the weirdness in arm_smmu_detach_dev() causes trouble here?
Despite the name the function is some sort of preperation to
attach_dev.

So if we change attachments while a cdtab is active we should not
remove the cdtab.

Basically, no master->domain check..

IMHO, I still don't like how this is structured. We have
arm_smmu_detach_dev() which really just wants to invalidate the STE.
Now that you shifted some of the logic to functions this might be
better overall:

static void arm_smmu_store_ste(struct arm_smmu_master *master,
__le64 *dst, u64 *src)
{
bool ste_sync_all = false;

for (i = 1; i < 4; i++) {
if (dst[i] == cpu_to_le64(ste[i]))
continue;
dst[i] = cpu_to_le64(ste[i]);
ste_sync_all = true;
}

if (ste_sync_all)
arm_smmu_sync_ste_for_sid(smmu, sid);
/* See comment in arm_smmu_write_ctx_desc() */
WRITE_ONCE(dst[0], cpu_to_le64(ste[0]));
arm_smmu_sync_ste_for_sid(smmu, sid);
}

static void arm_smmu_clear_strtab_ent(struct arm_smmu_master *master,
__le64 *dst)
{
u64 ste[4] = {};

ste[0] = STRTAB_STE_0_V;
if (disable_bypass)
arm_smmu_ste_abort(ste);
else
arm_smmu_ste_bypass(ste);
arm_smmu_store_ste(master, dst, &ste);
}

And use clear_strtab_ent from detach ??

(but then I wonder why not set V=0 instead of STE.Config = abort?)

> + arm_smmu_ste_stage1_translate(master, ste);
> + } else if (master->domain &&
> + master->domain->stage == ARM_SMMU_DOMAIN_S2) {
> BUG_ON(ste_live);
> + arm_smmu_ste_stage2_translate(master, ste);

This whole bit looks nicer as one if

} else if (master->domain) {
if (master->domain->stage == ARM_SMMU_DOMAIN_S2)
arm_smmu_ste_stage2_translate(master, ste);
else if (master->domain->domain.type == IOMMU_DOMAIN_IDENTITY)
arm_smmu_ste_bypass(ste);
else
BUG_ON()
} else {
// Ugh, removing this case requires more work
}

(Linus will not like the bug_on's btw, the function really should
fail)

> + for (i = 1; i < 4; i++) {
> + if (dst[i] == cpu_to_le64(ste[i]))
> + continue;
> + dst[i] = cpu_to_le64(ste[i]);
> + ste_sync_all = true;
> + }

This isn't going to work if the transition is from a fully valid STE
to an invalid one, it will corrupt the still in-use bytes.

Though current code does this:

dst[0] = cpu_to_le64(val);
dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
STRTAB_STE_1_SHCFG_INCOMING));
dst[2] = 0; /* Nuke the VMID */

Which I don't really understand either? Why is it OK to wipe the VMID
out of order with the STE.Config change?

Be sure to read the part of the SMMU spec talking about how to update
these things, 3.21.3.1 Configuration structure update procedure and
nearby.

Regardless there are clearly two orders in the existing code

Write 0,1,2,flush (translation -> bypass/fault)

Write 3,2,1,flush,0,flush (bypass/fault -> translation)

You still have to preserve both behaviors.

(interestingly neither seem to follow the guidance of the ARM manual,
so huh)

Still, I think this should be able to become more robust in general..
You have a current and target STE and you just need to figure out what
order to write the bits and if a V=0 transition is needed.

The bigger question is does this have to be more generic to handle
S1DSS which is it's original design goal?

Jason