From 0d9a0c2d3573d9100787fe88bc0bf82a72fe77df Mon Sep 17 00:00:00 2001 From: Sudarshan Rajagopalan Date: Wed, 7 Feb 2018 16:46:59 -0800 Subject: [PATCH] iommu: io-pgtable-arm: Implement IOMMU_USE_LLC_NWA With the GPU moving onto a new SMMU, the override was removed due to the way MMU-500 was integrated. Hence, the method of hinting the SMMU of the upstream device's bus attributes would no longer work for GPU. Instead, encode the MAIR and TCR register through the IOMMU_USE_LLC_NWA attribute flag to map the memory into System Cache with no Write-Allocate. MAIR Encoding: Bits[7:4] => 0b1110 = Outer Write-back read-allocate, no write-allocate Bits[3:0] => 0b0100 = Inner non-cacheable normal memory TCR Encoding: SH => 0b10 = Outer Shareable ORGN => 0b11 = Write-Back, no Write-Allocate cacheable Change-Id: I34db1ebfb5f4e080ca01328176bcabc368e9ddab Signed-off-by: Sudarshan Rajagopalan --- Documentation/DMA-attributes.txt | 7 +++++++ arch/arm64/mm/dma-mapping.c | 2 ++ drivers/iommu/arm-smmu.c | 21 ++++++++++++++++++++- drivers/iommu/dma-iommu.c | 3 +++ drivers/iommu/io-pgtable-arm.c | 24 ++++++++++++++++++++++-- drivers/iommu/io-pgtable.h | 5 +++++ include/linux/dma-mapping.h | 7 +++++++ include/linux/iommu.h | 4 ++++ 8 files changed, 70 insertions(+), 3 deletions(-) diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt index 8f8d97f65d73..7972942f8c19 100644 --- a/Documentation/DMA-attributes.txt +++ b/Documentation/DMA-attributes.txt @@ -156,3 +156,10 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged subsystem that the buffer is fully accessible at the elevated privilege level (and ideally inaccessible or at least read-only at the lesser-privileged levels). + +DMA_ATTR_IOMMU_USE_LLC_NWA +------------------------------------ + +DMA_ATTR_IOMMU_USE_LLC_NWA: Overrides the bus attributes to use +System Cache(LLC) with allocation policy as Inner Non-Cacheable, Outer Cacheable: +Write-Back, Read-Allocate, No Write-Allocate policy. \ No newline at end of file diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 87fe156574f6..3a9b5e83c9d4 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -1092,6 +1092,8 @@ static int __get_iommu_pgprot(unsigned long attrs, int prot, prot |= IOMMU_NOEXEC; if (attrs & DMA_ATTR_IOMMU_USE_UPSTREAM_HINT) prot |= IOMMU_USE_UPSTREAM_HINT; + if (attrs & DMA_ATTR_IOMMU_USE_LLC_NWA) + prot |= IOMMU_USE_LLC_NWA; if (coherent) prot |= IOMMU_CACHE; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 6fb07c709f26..1eaede6e1b59 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1679,7 +1679,10 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, quirks |= IO_PGTABLE_QUIRK_QCOM_USE_UPSTREAM_HINT; if (is_iommu_pt_coherent(smmu_domain)) quirks |= IO_PGTABLE_QUIRK_NO_DMA; - if ((quirks & IO_PGTABLE_QUIRK_QCOM_USE_UPSTREAM_HINT) && + if (smmu_domain->attributes & (1 << DOMAIN_ATTR_USE_LLC_NWA)) + quirks |= IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA; + if (((quirks & IO_PGTABLE_QUIRK_QCOM_USE_UPSTREAM_HINT) || + (quirks & IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA)) && (smmu->model == QCOM_SMMUV500)) quirks |= IO_PGTABLE_QUIRK_QSMMUV500_NON_SHAREABLE; @@ -2819,6 +2822,11 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, (1 << DOMAIN_ATTR_USE_UPSTREAM_HINT)); ret = 0; break; + case DOMAIN_ATTR_USE_LLC_NWA: + *((int *)data) = !!(smmu_domain->attributes & + (1 << DOMAIN_ATTR_USE_LLC_NWA)); + ret = 0; + break; case DOMAIN_ATTR_EARLY_MAP: *((int *)data) = !!(smmu_domain->attributes & (1 << DOMAIN_ATTR_EARLY_MAP)); @@ -3001,6 +3009,17 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, 1 << DOMAIN_ATTR_USE_UPSTREAM_HINT; ret = 0; break; + case DOMAIN_ATTR_USE_LLC_NWA: + /* can't be changed while attached */ + if (smmu_domain->smmu != NULL) { + ret = -EBUSY; + break; + } + if (*((int *)data)) + smmu_domain->attributes |= + 1 << DOMAIN_ATTR_USE_LLC_NWA; + ret = 0; + break; case DOMAIN_ATTR_EARLY_MAP: { int early_map = *((int *)data); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index bb5c7c711f32..7ea5788f8484 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -381,6 +381,9 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent, if (attrs & DMA_ATTR_IOMMU_USE_UPSTREAM_HINT) prot |= IOMMU_USE_UPSTREAM_HINT; + if (attrs & DMA_ATTR_IOMMU_USE_LLC_NWA) + prot |= IOMMU_USE_LLC_NWA; + switch (dir) { case DMA_BIDIRECTIONAL: return prot | IOMMU_READ | IOMMU_WRITE; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index c95cddf5c06d..c26b8e9604aa 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -169,15 +169,18 @@ #define ARM_LPAE_TCR_PS_48_BIT 0x5ULL #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3) +#define ARM_LPAE_MAIR1_ATTR_SHIFT(n) ((n-4) << 3) #define ARM_LPAE_MAIR_ATTR_MASK 0xff #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 #define ARM_LPAE_MAIR_ATTR_NC 0x44 #define ARM_LPAE_MAIR_ATTR_WBRWA 0xff #define ARM_LPAE_MAIR_ATTR_UPSTREAM 0xf4 +#define ARM_LPAE_MAIR_ATTR_LLC_NWA 0xe4 #define ARM_LPAE_MAIR_ATTR_IDX_NC 0 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 #define ARM_LPAE_MAIR_ATTR_IDX_UPSTREAM 3 +#define ARM_LPAE_MAIR_ATTR_IDX_LLC_NWA 0x4ULL /* IOPTE accessors */ #define iopte_deref(pte, d) \ @@ -583,6 +586,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, else if (prot & IOMMU_USE_UPSTREAM_HINT) pte |= (ARM_LPAE_MAIR_ATTR_IDX_UPSTREAM << ARM_LPAE_PTE_ATTRINDX_SHIFT); + else if (prot & IOMMU_USE_LLC_NWA) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_LLC_NWA + << ARM_LPAE_PTE_ATTRINDX_SHIFT); } else { pte = ARM_LPAE_PTE_HAP_FAULT; if (prot & IOMMU_READ) @@ -1117,7 +1123,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA | IO_PGTABLE_QUIRK_QCOM_USE_UPSTREAM_HINT - | IO_PGTABLE_QUIRK_QSMMUV500_NON_SHAREABLE)) + | IO_PGTABLE_QUIRK_QSMMUV500_NON_SHAREABLE + | IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -1138,6 +1145,15 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) | (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + else if ((cfg->quirks & IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA) && + (cfg->quirks & IO_PGTABLE_QUIRK_QSMMUV500_NON_SHAREABLE)) + reg = (ARM_LPAE_TCR_SH_NS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_WB << ARM_LPAE_TCR_ORGN0_SHIFT); + else if (cfg->quirks & IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA) + reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_WB << ARM_LPAE_TCR_ORGN0_SHIFT); else reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) | (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | @@ -1195,7 +1211,11 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_UPSTREAM)); cfg->arm_lpae_s1_cfg.mair[0] = reg; - cfg->arm_lpae_s1_cfg.mair[1] = 0; + + reg = ARM_LPAE_MAIR_ATTR_LLC_NWA + << ARM_LPAE_MAIR1_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_LLC_NWA); + + cfg->arm_lpae_s1_cfg.mair[1] = reg; /* Looking good; allocate a pgd */ data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index d53c22339001..2b6c758c3c38 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -93,6 +93,10 @@ struct io_pgtable_cfg { * set in TCR for the page table walker. Use attributes specified * by the upstream hw instead. * + * IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA: Override the attributes + * set in TCR for the page table walker with Write-Back, + * no Write-Allocate cacheable encoding. + * */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -101,6 +105,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) #define IO_PGTABLE_QUIRK_QSMMUV500_NON_SHAREABLE BIT(5) #define IO_PGTABLE_QUIRK_QCOM_USE_UPSTREAM_HINT BIT(6) + #define IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA BIT(7) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f30972e4e8ca..911b7248d6f4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -112,6 +112,13 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 17) +/* + * DMA_ATTR_IOMMU_USE_LLC_NWA: Overrides the bus attributes to use the System + * Cache(LLC) with allocation policy as Inner Non-Cacheable, Outer Cacheable: + * Write-Back, Read-Allocate, No Write-Allocate policy. + */ +#define DMA_ATTR_IOMMU_USE_LLC_NWA (1UL << 18) + #define DMA_ERROR_CODE (~(dma_addr_t)0) /* diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9ce5316bdee6..7ea1d94ed675 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -46,6 +46,9 @@ /* Use upstream device's bus attribute */ #define IOMMU_USE_UPSTREAM_HINT (1 << 6) +/* Use upstream device's bus attribute with no write-allocate cache policy */ +#define IOMMU_USE_LLC_NWA (1 << 7) + struct iommu_ops; struct iommu_group; struct bus_type; @@ -155,6 +158,7 @@ enum iommu_attr { DOMAIN_ATTR_CB_STALL_DISABLE, DOMAIN_ATTR_BITMAP_IOVA_ALLOCATOR, DOMAIN_ATTR_QCOM_MMU500_ERRATA_MIN_IOVA_ALIGN, + DOMAIN_ATTR_USE_LLC_NWA, DOMAIN_ATTR_MAX, };