Re: [GIT PULL] x86/mm for 6.4

From: Dave Hansen
Date: Thu May 04 2023 - 11:26:30 EST


On 5/3/23 23:28, Kirill A. Shutemov wrote:
>> Untagging a kernel address will "corrupt" it, but it will stay a
>> kernel address (well, it will stay a "high bit set" address), which is
>> all we care about anyway.
> The interesting case to consider is untagging kernel pointer when LAM_U48
> is enabled (not part of current LAM enabling). LAM_U48 would make the
> untagging mask wider -- ~GENMASK(62, 48). With 5-level paging and LAM_SUP
> enabled (also not supported yet) untagging kernel may transform it to
> other valid kernel pointer.
>
> So we cannot rely on #GP as backstop here. The kernel has to exclude
> kernel pointer by other means. It can be fun to debug.

Yeah, I have the feeling that we're really going to need a pair of
untagging functions once we get to doing kernel LAM for a _bunch_ of
reasons.

Just as a practical matter, I think we should OR bits into the mask on
the kernel side, effectively:

unsigned long untag_kernel_addr(unsigned long addr)
{
return addr | kernel_untag_mask;
}

and kernel_untag_mask should have bit 63 *clear*.

That way the pointers that have gone through untagging won't look silly.
If you untag VMALLOC_END or something, it'll still look like the
addresses we have in mm.rst.

Also, it'll be impossible to have the mask turn a userspace address into
a kernel one.

Last, we can add some debugging in there, probably conditional on some
mm debugging options like:

if (WARN_ON_ONCE(!valid_user_address(addr)))
return 0;

It's kinda like "void __user *" versus "void *". The __user ones can
*absolutely* point anywhere, user or kernel. That's why we can't WARN()
in the untagged_addr() function that takes user pointers.

But "void *" can only point to the kernel. It has totally different rules.

We should probably also do something like the attached patch sooner
rather than later. 'untag_mask' really is a misleading name for a mask
that gets applied only to user addresses.diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 0da5c227f490..adee70ee398c 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -51,7 +51,7 @@ typedef struct {
unsigned long lam_cr3_mask;

/* Significant bits of the virtual address. Excludes tag bits. */
- u64 untag_mask;
+ u64 user_untag_mask;
#endif

struct mutex lock;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 1d29dc791f5a..b4bb4b1a36e4 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -94,18 +94,18 @@ static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
- mm->context.untag_mask = oldmm->context.untag_mask;
+ mm->context.user_untag_mask = oldmm->context.user_untag_mask;
}

#define mm_untag_mask mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
- return mm->context.untag_mask;
+ return mm->context.user_untag_mask;
}

static inline void mm_reset_untag_mask(struct mm_struct *mm)
{
- mm->context.untag_mask = -1UL;
+ mm->context.user_untag_mask = -1UL;
}

#define arch_pgtable_dma_compat arch_pgtable_dma_compat
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 75bfaa421030..9fc6a2ac5318 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -55,11 +55,11 @@ static inline void cr4_clear_bits(unsigned long mask)
}

#ifdef CONFIG_ADDRESS_MASKING
-DECLARE_PER_CPU(u64, tlbstate_untag_mask);
+DECLARE_PER_CPU(u64, tlbstate_user_untag_mask);

-static inline u64 current_untag_mask(void)
+static inline u64 current_user_untag_mask(void)
{
- return this_cpu_read(tlbstate_untag_mask);
+ return this_cpu_read(tlbstate_user_untag_mask);
}
#endif

@@ -389,7 +389,7 @@ static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
{
this_cpu_write(cpu_tlbstate.lam,
mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT);
- this_cpu_write(tlbstate_untag_mask, mm->context.untag_mask);
+ this_cpu_write(tlbstate_user_untag_mask, mm->context.user_untag_mask);
}

#else
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 123135d60f72..044d82e98b27 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -36,16 +36,16 @@ static inline unsigned long __untagged_addr(unsigned long addr)
long sign;

/*
- * Refer tlbstate_untag_mask directly to avoid RIP-relative relocation
- * in alternative instructions. The relocation gets wrong when gets
- * copied to the target place.
+ * Refer tlbstate_user_untag_mask directly to avoid RIP-relative
+ * relocation in alternative instructions. The relocation gets wrong when
+ * gets copied to the target place.
*/
asm (ALTERNATIVE("",
"sar $63, %[sign]\n\t" /* user_ptr ? 0 : -1UL */
- "or %%gs:tlbstate_untag_mask, %[sign]\n\t"
+ "or %%gs:tlbstate_user_untag_mask, %[sign]\n\t"
"and %[sign], %[addr]\n\t", X86_FEATURE_LAM)
: [addr] "+r" (addr), [sign] "=r" (sign)
- : "m" (tlbstate_untag_mask), "[sign]" (addr));
+ : "m" (tlbstate_user_untag_mask), "[sign]" (addr));

return addr;
}
@@ -55,20 +55,20 @@ static inline unsigned long __untagged_addr(unsigned long addr)
(__force __typeof__(addr))__untagged_addr(__addr); \
})

-static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
- unsigned long addr)
+static inline unsigned long __untag_remote_user_addr(struct mm_struct *mm,
+ unsigned long addr)
{
long sign = addr >> 63;

mmap_assert_locked(mm);
- addr &= (mm)->context.untag_mask | sign;
+ addr &= (mm)->context.user_untag_mask | sign;

return addr;
}

-#define untagged_addr_remote(mm, addr) ({ \
+#define untag_remote_user_addr(mm, addr) ({ \
unsigned long __addr = (__force unsigned long)(addr); \
- (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \
+ (__force __typeof__(addr))__untag_remote_user_addr(mm, __addr); \
})

#else
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 3d4dd9420c30..929152b28b8c 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -580,7 +580,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
goto done;
}

- vaddr = untagged_addr_remote(mm, vaddr);
+ vaddr = untag_remote_user_addr(mm, vaddr);

retry:
vma = vma_lookup(mm, vaddr);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 420510f6a545..7ccd3f341af4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1692,7 +1692,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
- start_vaddr = untagged_addr_remote(mm, svpfn << PAGE_SHIFT);
+ start_vaddr = untag_remote_user_addr(mm, svpfn << PAGE_SHIFT);
mmap_read_unlock(mm);
}

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 3064314f4832..8c956172252f 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -25,8 +25,8 @@
#define untagged_addr(addr) (addr)
#endif

-#ifndef untagged_addr_remote
-#define untagged_addr_remote(mm, addr) ({ \
+#ifndef untag_remote_user_addr
+#define untag_remote_user_addr(mm, addr) ({ \
mmap_assert_locked(mm); \
untagged_addr(addr); \
})
diff --git a/mm/gup.c b/mm/gup.c
index bbe416236593..d22bad9147d7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1085,7 +1085,7 @@ static long __get_user_pages(struct mm_struct *mm,
if (!nr_pages)
return 0;

- start = untagged_addr_remote(mm, start);
+ start = untag_remote_user_addr(mm, start);

VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));

@@ -1259,7 +1259,7 @@ int fixup_user_fault(struct mm_struct *mm,
struct vm_area_struct *vma;
vm_fault_t ret;

- address = untagged_addr_remote(mm, address);
+ address = untag_remote_user_addr(mm, address);

if (unlocked)
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
diff --git a/mm/madvise.c b/mm/madvise.c
index b5ffbaf616f5..ffb5d76a9481 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1421,7 +1421,7 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
mmap_read_lock(mm);
}

- start = untagged_addr_remote(mm, start);
+ start = untag_remote_user_addr(mm, start);
end = start + len;

blk_start_plug(&plug);
diff --git a/mm/migrate.c b/mm/migrate.c
index 01cac26a3127..7647c0a6ff30 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2109,7 +2109,7 @@ static int add_page_for_migration(struct mm_struct *mm, const void __user *p,
bool isolated;

mmap_read_lock(mm);
- addr = (unsigned long)untagged_addr_remote(mm, p);
+ addr = (unsigned long)untag_remote_user_addr(mm, p);

err = -EFAULT;
vma = vma_lookup(mm, addr);