Re: [PATCH v4 4/6] VT-d: add device IOTLB invalidation support

From: Grant Grundler
Date: Sun Mar 29 2009 - 01:20:03 EST


On Mon, Mar 23, 2009 at 03:59:00PM +0800, Yu Zhao wrote:
> Support device IOTLB invalidation to flush the translation cached
> in the Endpoint.
>
> Signed-off-by: Yu Zhao <yu.zhao@xxxxxxxxx>
> ---
> drivers/pci/dmar.c | 77 ++++++++++++++++++++++++++++++++++++++----
> include/linux/intel-iommu.h | 14 +++++++-
> 2 files changed, 82 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
> index 106bc45..494b167 100644
> --- a/drivers/pci/dmar.c
> +++ b/drivers/pci/dmar.c
> @@ -674,7 +674,8 @@ void free_iommu(struct intel_iommu *iommu)
> */
> static inline void reclaim_free_desc(struct q_inval *qi)
> {
> - while (qi->desc_status[qi->free_tail] == QI_DONE) {
> + while (qi->desc_status[qi->free_tail] == QI_DONE ||
> + qi->desc_status[qi->free_tail] == QI_ABORT) {
> qi->desc_status[qi->free_tail] = QI_FREE;
> qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
> qi->free_cnt++;
> @@ -684,10 +685,13 @@ static inline void reclaim_free_desc(struct q_inval *qi)
> static int qi_check_fault(struct intel_iommu *iommu, int index)
> {
> u32 fault;
> - int head;
> + int head, tail;
> struct q_inval *qi = iommu->qi;
> int wait_index = (index + 1) % QI_LENGTH;
>
> + if (qi->desc_status[wait_index] == QI_ABORT)
> + return -EAGAIN;
> +
> fault = readl(iommu->reg + DMAR_FSTS_REG);
>
> /*
> @@ -697,7 +701,11 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
> */
> if (fault & DMA_FSTS_IQE) {
> head = readl(iommu->reg + DMAR_IQH_REG);
> - if ((head >> 4) == index) {
> + if ((head >> DMAR_IQ_OFFSET) == index) {

Yu,
DMAR_IQ_OFFSET should probably be called DMAR_IQ_SHIFT since it's used the
same way that "PAGE_SHIFT" is used.

I've looked through the rest of the code and don't see any problems.
But I also don't have a clue what "ITE" (in IOMMU context) is. I'm assuming
it has something to do with translation errors but have no idea about
where/when those are generated and what the outcome is.

thanks,
grant

> + printk(KERN_ERR "VT-d detected invalid descriptor: "
> + "low=%llx, high=%llx\n",
> + (unsigned long long)qi->desc[index].low,
> + (unsigned long long)qi->desc[index].high);
> memcpy(&qi->desc[index], &qi->desc[wait_index],
> sizeof(struct qi_desc));
> __iommu_flush_cache(iommu, &qi->desc[index],
> @@ -707,6 +715,32 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
> }
> }
>
> + /*
> + * If ITE happens, all pending wait_desc commands are aborted.
> + * No new descriptors are fetched until the ITE is cleared.
> + */
> + if (fault & DMA_FSTS_ITE) {
> + head = readl(iommu->reg + DMAR_IQH_REG);
> + head = ((head >> DMAR_IQ_OFFSET) - 1 + QI_LENGTH) % QI_LENGTH;
> + head |= 1;
> + tail = readl(iommu->reg + DMAR_IQT_REG);
> + tail = ((tail >> DMAR_IQ_OFFSET) - 1 + QI_LENGTH) % QI_LENGTH;
> +
> + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
> +
> + do {
> + if (qi->desc_status[head] == QI_IN_USE)
> + qi->desc_status[head] = QI_ABORT;
> + head = (head - 2 + QI_LENGTH) % QI_LENGTH;
> + } while (head != tail);
> +
> + if (qi->desc_status[wait_index] == QI_ABORT)
> + return -EAGAIN;
> + }
> +
> + if (fault & DMA_FSTS_ICE)
> + writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
> +
> return 0;
> }
>
> @@ -716,7 +750,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
> */
> int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
> {
> - int rc = 0;
> + int rc;
> struct q_inval *qi = iommu->qi;
> struct qi_desc *hw, wait_desc;
> int wait_index, index;
> @@ -727,6 +761,9 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
>
> hw = qi->desc;
>
> +restart:
> + rc = 0;
> +
> spin_lock_irqsave(&qi->q_lock, flags);
> while (qi->free_cnt < 3) {
> spin_unlock_irqrestore(&qi->q_lock, flags);
> @@ -757,7 +794,7 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
> * update the HW tail register indicating the presence of
> * new descriptors.
> */
> - writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
> + writel(qi->free_head << DMAR_IQ_OFFSET, iommu->reg + DMAR_IQT_REG);
>
> while (qi->desc_status[wait_index] != QI_DONE) {
> /*
> @@ -769,18 +806,21 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
> */
> rc = qi_check_fault(iommu, index);
> if (rc)
> - goto out;
> + break;
>
> spin_unlock(&qi->q_lock);
> cpu_relax();
> spin_lock(&qi->q_lock);
> }
> -out:
> - qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
> +
> + qi->desc_status[index] = QI_DONE;
>
> reclaim_free_desc(qi);
> spin_unlock_irqrestore(&qi->q_lock, flags);
>
> + if (rc == -EAGAIN)
> + goto restart;
> +
> return rc;
> }
>
> @@ -847,6 +887,27 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
> return qi_submit_sync(&desc, iommu);
> }
>
> +int qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
> + u64 addr, unsigned mask)
> +{
> + struct qi_desc desc;
> +
> + if (mask) {
> + BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
> + addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
> + desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
> + } else
> + desc.high = QI_DEV_IOTLB_ADDR(addr);
> +
> + if (qdep >= QI_DEV_IOTLB_MAX_INVS)
> + qdep = 0;
> +
> + desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
> + QI_DIOTLB_TYPE;
> +
> + return qi_submit_sync(&desc, iommu);
> +}
> +
> /*
> * Enable Queued Invalidation interface. This is a must to support
> * interrupt-remapping. Also used by DMA-remapping, which replaces
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 660a7f4..a32b3db 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -53,6 +53,7 @@
> #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
> #define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
> #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
> +#define DMAR_IQ_OFFSET 4 /* Invalidation queue head/tail offset */
> #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
> #define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
> #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
> @@ -195,6 +196,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
> #define DMA_FSTS_PPF ((u32)2)
> #define DMA_FSTS_PFO ((u32)1)
> #define DMA_FSTS_IQE (1 << 4)
> +#define DMA_FSTS_ICE (1 << 5)
> +#define DMA_FSTS_ITE (1 << 6)
> #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
>
> /* FRCD_REG, 32 bits access */
> @@ -223,7 +226,8 @@ do { \
> enum {
> QI_FREE,
> QI_IN_USE,
> - QI_DONE
> + QI_DONE,
> + QI_ABORT
> };
>
> #define QI_CC_TYPE 0x1
> @@ -252,6 +256,12 @@ enum {
> #define QI_CC_DID(did) (((u64)did) << 16)
> #define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4))
>
> +#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32)
> +#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16)
> +#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
> +#define QI_DEV_IOTLB_SIZE 1
> +#define QI_DEV_IOTLB_MAX_INVS 32
> +
> struct qi_desc {
> u64 low, high;
> };
> @@ -329,6 +339,8 @@ extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
> extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
> unsigned int size_order, u64 type,
> int non_present_entry_flush);
> +extern int qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
> + u64 addr, unsigned mask);
>
> extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
>
> --
> 1.5.6.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/