[patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled

From: Ingo Molnar
Date: Sat May 20 2006 - 04:53:16 EST



* Linus Torvalds <torvalds@xxxxxxxx> wrote:

> > Well that patch took a machine from working to non-working. Pretty serious
> > stuff. We should get to the bottom of the problem so we can assess the
> > risk and impact, no?
>
> Yes. And it would be good to have a way to turn it off - either
> globally of by some per-process setup (eg off by default, but turn on
> when doing some magic).
>
> The per-process one would be the harder one, because it would require
> the fixmap entry, but not globally. So I suspect the only practical
> thing would be to have it be a kernel boot-time option.

below is a patch that adds the vdso=0 boot option from exec-shield and
the /proc/sys/vm/vdso_enabled per-system sysctl.

Andrew, could you try this - do newly started processes work fine if you
re-enable the vdso after booting with vdso=0? That could tell us whether
it's an init bug or a glibc bug.

Ingo

--------
Subject: i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
From: Ingo Molnar <mingo@xxxxxxx>

add the vdso=0 boot option and the /proc/sys/vm/vdso_enabled sysctl, on
i386. VDSO defaults to enabled. The runtime switch works fine for newly
started processes [it does not impact existing process images]:

# cat /proc/self/maps | grep vdso
b7f42000-b7f43000 r-xp b7f42000 00:00 0 [vdso]
# echo 0 > /proc/sys/vm/vdso_enabled
# cat /proc/self/maps | grep vdso
# echo 1 > /proc/sys/vm/vdso_enabled
# cat /proc/self/maps | grep vdso
b7f05000-b7f06000 r-xp b7f05000 00:00 0 [vdso]
#

Signed-off-by: Ingo Molnar <mingo@xxxxxxx>

---
arch/i386/kernel/sysenter.c | 21 +++++++++++++++++++++
include/linux/sysctl.h | 1 +
kernel/sysctl.c | 16 ++++++++++++++++
3 files changed, 38 insertions(+)

Index: linux/arch/i386/kernel/sysenter.c
===================================================================
--- linux.orig/arch/i386/kernel/sysenter.c
+++ linux/arch/i386/kernel/sysenter.c
@@ -22,6 +22,21 @@
#include <asm/pgtable.h>
#include <asm/unistd.h>

+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int vdso_enabled = 1;
+
+static int __init vdso_setup(char *s)
+{
+ vdso_enabled = simple_strtoul(s, NULL, 0);
+
+ return 1;
+}
+
+__setup("vdso=", vdso_setup);
+
extern asmlinkage void sysenter_entry(void);

void enable_sep_cpu(void)
@@ -97,6 +112,9 @@ int arch_setup_additional_pages(struct l
unsigned long addr;
int ret;

+ if (unlikely(!vdso_enabled))
+ return 0;
+
down_write(&mm->mmap_sem);
addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
if (IS_ERR_VALUE(addr)) {
@@ -122,16 +140,19 @@ int arch_setup_additional_pages(struct l
ret = insert_vm_struct(mm, vma);
if (ret)
goto free_vma;
+
current->mm->context.vdso = (void *)addr;
current_thread_info()->sysenter_return = SYSENTER_RETURN_OFFSET + addr;
mm->total_vm++;
up_write(&mm->mmap_sem);
+
return 0;

free_vma:
kmem_cache_free(vm_area_cachep, vma);
up_fail:
up_write(&mm->mmap_sem);
+
return ret;
}

Index: linux/include/linux/sysctl.h
===================================================================
--- linux.orig/include/linux/sysctl.h
+++ linux/include/linux/sysctl.h
@@ -186,6 +186,7 @@ enum
VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
+ VM_VDSO_ENABLED=33, /* map VDSO into new processes? */
};


Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -158,6 +158,10 @@ extern ctl_table inotify_table[];
int sysctl_legacy_va_layout;
#endif

+#ifdef CONFIG_X86_32
+extern int vdso_enabled;
+#endif
+
/* /proc declarations: */

#ifdef CONFIG_PROC_FS
@@ -915,6 +919,18 @@ static ctl_table vm_table[] = {
.strategy = &sysctl_jiffies,
},
#endif
+#ifdef CONFIG_X86_32
+ {
+ .ctl_name = VM_VDSO_ENABLED,
+ .procname = "vdso_enabled",
+ .data = &vdso_enabled,
+ .maxlen = sizeof(vdso_enabled),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ },
+#endif
{ .ctl_name = 0 }
};

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/