Re: [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous

From: Borislav Petkov
Date: Fri Jul 24 2015 - 11:30:12 EST


On Wed, Jul 22, 2015 at 12:23:46PM -0700, Andy Lutomirski wrote:
> modify_ldt has questionable locking and does not synchronize
> threads. Improve it: redesign the locking and synchronize all
> threads' LDTs using an IPI on all modifications.
>
> This will dramatically slow down modify_ldt in multithreaded
> programs, but there shouldn't be any multithreaded programs that
> care about modify_ldt's performance in the first place.
>
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>

Just minor stylistic nitpicks below. Otherwise looks ok to me.

...

> diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
> index c37886d759cc..3ae308029dee 100644
> --- a/arch/x86/kernel/ldt.c
> +++ b/arch/x86/kernel/ldt.c
> @@ -12,6 +12,7 @@
> #include <linux/string.h>
> #include <linux/mm.h>
> #include <linux/smp.h>
> +#include <linux/slab.h>
> #include <linux/vmalloc.h>
> #include <linux/uaccess.h>
>
> @@ -20,82 +21,83 @@
> #include <asm/mmu_context.h>
> #include <asm/syscalls.h>
>
> -#ifdef CONFIG_SMP
> static void flush_ldt(void *current_mm)
> {
> - if (current->active_mm == current_mm)
> - load_LDT(&current->active_mm->context);
> + if (current->active_mm == current_mm) {

Save indentation level:

if (current->active_mm != current_mm)
return;

> + /* context.lock is held for us, so we don't need any locking. */

Stick that comment above the function name.

> + mm_context_t *pc = &current->active_mm->context;
> + set_ldt(pc->ldt->entries, pc->ldt->size);
> + }
> }
> -#endif
>
> -static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
> +/* The caller must call finalize_ldt_struct on the result. */
> +static struct ldt_struct *alloc_ldt_struct(int size)
> {
> - void *oldldt, *newldt;
> - int oldsize;
> + struct ldt_struct *new_ldt;
> + int alloc_size;
>
> - if (mincount <= pc->size)
> - return 0;
> - oldsize = pc->size;
> - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
> - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
> - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
> - newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
> + if (size > LDT_ENTRIES)
> + return NULL;
> +
> + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
> + if (!new_ldt)
> + return NULL;
> +
> + BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
> + alloc_size = size * LDT_ENTRY_SIZE;
> +
> + if (alloc_size > PAGE_SIZE)
> + new_ldt->entries = vmalloc(alloc_size);
> else
> - newldt = (void *)__get_free_page(GFP_KERNEL);
> + new_ldt->entries = (void *)__get_free_page(GFP_KERNEL);

newline here.

> + if (!new_ldt->entries) {
> + kfree(new_ldt);
> + return NULL;
> + }
>
> - if (!newldt)
> - return -ENOMEM;
> + new_ldt->size = size;
> + return new_ldt;
> +}
> +
> +/* After calling this, the LDT is immutable. */
> +static void finalize_ldt_struct(struct ldt_struct *ldt)
> +{
> + paravirt_alloc_ldt(ldt->entries, ldt->size);
> +}
>
> - if (oldsize)
> - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
> - oldldt = pc->ldt;
> - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
> - (mincount - oldsize) * LDT_ENTRY_SIZE);
> +static void install_ldt(struct mm_struct *current_mm,
> + struct ldt_struct *ldt)
> +{
> + /* context.lock is held */
> + preempt_disable();
>
> - paravirt_alloc_ldt(newldt, mincount);
> + /* Synchronizes with lockless_dereference in load_mm_ldt. */

Good.

> + smp_store_release(&current_mm->context.ldt, ldt);
>
> -#ifdef CONFIG_X86_64
> - /* CHECKME: Do we really need this ? */
> - wmb();
> -#endif
> - pc->ldt = newldt;
> - wmb();
> - pc->size = mincount;
> - wmb();
> + /* Activate for this CPU. */
> + flush_ldt(current->mm);
>
> - if (reload) {
> #ifdef CONFIG_SMP
> - preempt_disable();
> - load_LDT(pc);
> - if (!cpumask_equal(mm_cpumask(current->mm),
> - cpumask_of(smp_processor_id())))
> - smp_call_function(flush_ldt, current->mm, 1);
> - preempt_enable();
> -#else
> - load_LDT(pc);
> + /* Synchronize with other CPUs. */
> + if (!cpumask_equal(mm_cpumask(current_mm),
> + cpumask_of(smp_processor_id())))

Let it stick out:

if (!cpumask_equal(mm_cpumask(current_mm), cpumask_of(smp_processor_id())))
smp_call_function(flush_ldt, current_mm, 1);
> #endif
> - }
> - if (oldsize) {
> - paravirt_free_ldt(oldldt, oldsize);
> - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
> - vfree(oldldt);
> - else
> - put_page(virt_to_page(oldldt));
> - }
> - return 0;
> + preempt_enable();
> }
>
> -static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
> +static void free_ldt_struct(struct ldt_struct *ldt)
> {
> - int err = alloc_ldt(new, old->size, 0);
> - int i;
> -
> - if (err < 0)
> - return err;
> -
> - for (i = 0; i < old->size; i++)
> - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
> - return 0;
> + if (unlikely(ldt)) {

Save an indentation level:

int alloc_size;

if (!ldt)
return;

alloc_size = sizeof(struct ldt_struct) + ldt->size * LDT_ENTRY_SIZE;

...

> + paravirt_free_ldt(ldt->entries, ldt->size);
> + if (alloc_size > PAGE_SIZE)
> + vfree(ldt->entries);
> + else
> + put_page(virt_to_page(ldt->entries));
> + kfree(ldt);
> + }
> }
>
> /*
> @@ -108,13 +110,32 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
> int retval = 0;
>
> mutex_init(&mm->context.lock);
> - mm->context.size = 0;
> old_mm = current->mm;
> - if (old_mm && old_mm->context.size > 0) {
> - mutex_lock(&old_mm->context.lock);
> - retval = copy_ldt(&mm->context, &old_mm->context);
> - mutex_unlock(&old_mm->context.lock);
> + if (!old_mm) {
> + mm->context.ldt = NULL;
> + return 0;
> + }
> +
> + mutex_lock(&old_mm->context.lock);
> + if (old_mm->context.ldt) {

Same here:

if (!old_mm->context.ldt) {
mm->context.ldt = NULL;
goto out_unlock;
}

new_ldt = ...

> + struct ldt_struct *new_ldt =
> + alloc_ldt_struct(old_mm->context.ldt->size);
> + if (!new_ldt) {
> + retval = -ENOMEM;
> + goto out_unlock;
> + }
> +
> + memcpy(new_ldt->entries, old_mm->context.ldt->entries,
> + new_ldt->size * LDT_ENTRY_SIZE);
> + finalize_ldt_struct(new_ldt);
> +
> + mm->context.ldt = new_ldt;
> + } else {
> + mm->context.ldt = NULL;
> }
> +
> +out_unlock:
> + mutex_unlock(&old_mm->context.lock);
> return retval;
> }
>
> @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
> */
> void destroy_context(struct mm_struct *mm)
> {
> - if (mm->context.size) {
> -#ifdef CONFIG_X86_32
> - /* CHECKME: Can this ever happen ? */
> - if (mm == current->active_mm)
> - clear_LDT();
> -#endif
> - paravirt_free_ldt(mm->context.ldt, mm->context.size);
> - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
> - vfree(mm->context.ldt);
> - else
> - put_page(virt_to_page(mm->context.ldt));
> - mm->context.size = 0;
> - }
> + free_ldt_struct(mm->context.ldt);
> + mm->context.ldt = NULL;
> }
>
> static int read_ldt(void __user *ptr, unsigned long bytecount)
> {
> - int err;
> + int retval;
> unsigned long size;
> struct mm_struct *mm = current->mm;
>
> - if (!mm->context.size)
> - return 0;
> + mutex_lock(&mm->context.lock);
> +
> + if (!mm->context.ldt) {
> + retval = 0;
> + goto out_unlock;
> + }
> +
> if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
> bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
>
> - mutex_lock(&mm->context.lock);
> - size = mm->context.size * LDT_ENTRY_SIZE;
> + size = mm->context.ldt->size * LDT_ENTRY_SIZE;
> if (size > bytecount)
> size = bytecount;
>
> - err = 0;
> - if (copy_to_user(ptr, mm->context.ldt, size))
> - err = -EFAULT;
> - mutex_unlock(&mm->context.lock);
> - if (err < 0)
> - goto error_return;
> + if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
> + retval = -EFAULT;
> + goto out_unlock;
> + }
> +
> if (size != bytecount) {
> - /* zero-fill the rest */
> + /* Zero-fill the rest and pretend we read bytecount bytes. */
> if (clear_user(ptr + size, bytecount - size) != 0) {

Make that:

if (clear_user(ptr + size, bytecount - size))

> - err = -EFAULT;
> - goto error_return;
> + retval = -EFAULT;
> + goto out_unlock;
> }
> }
> - return bytecount;
> -error_return:
> - return err;
> + retval = bytecount;
> +
> +out_unlock:
> + mutex_unlock(&mm->context.lock);
> + return retval;
> }
>
> static int read_default_ldt(void __user *ptr, unsigned long bytecount)
> @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
> struct desc_struct ldt;
> int error;
> struct user_desc ldt_info;
> + int oldsize, newsize;
> + struct ldt_struct *new_ldt, *old_ldt;
>
> error = -EINVAL;
> if (bytecount != sizeof(ldt_info))
> @@ -213,34 +230,43 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
> goto out;
> }
>
> - mutex_lock(&mm->context.lock);
> - if (ldt_info.entry_number >= mm->context.size) {
> - error = alloc_ldt(&current->mm->context,
> - ldt_info.entry_number + 1, 1);
> - if (error < 0)
> - goto out_unlock;
> - }
> -
> - /* Allow LDTs to be cleared by the user. */
> - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
> - if (oldmode || LDT_empty(&ldt_info)) {
> - memset(&ldt, 0, sizeof(ldt));
> - goto install;
> + if ((oldmode && ldt_info.base_addr == 0 && ldt_info.limit == 0) ||

Shorten:

if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||

> + LDT_empty(&ldt_info)) {
> + /* The user wants to clear the entry. */
> + memset(&ldt, 0, sizeof(ldt));
> + } else {
> + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
> + error = -EINVAL;
> + goto out;
> }
> +
> + fill_ldt(&ldt, &ldt_info);
> + if (oldmode)
> + ldt.avl = 0;
> }
>
> - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
> - error = -EINVAL;
> + mutex_lock(&mm->context.lock);
> +
> + old_ldt = mm->context.ldt;
> + oldsize = old_ldt ? old_ldt->size : 0;
> + newsize = max((int)(ldt_info.entry_number + 1), oldsize);
> +
> + error = -ENOMEM;
> + new_ldt = alloc_ldt_struct(newsize);
> + if (!new_ldt)
> goto out_unlock;
> - }
>
> - fill_ldt(&ldt, &ldt_info);
> - if (oldmode)
> - ldt.avl = 0;
> + if (old_ldt) {
> + memcpy(new_ldt->entries, old_ldt->entries,
> + oldsize * LDT_ENTRY_SIZE);
> + }

Single if-statement doesn't need {} and you don't absolutely need to
keep 80cols. Just let it stick out.

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/