mm: mm, mmap: only apply a one page gap betwen the stack an MAP_FIXED

From: Willy Tarreau
Date: Thu Jul 06 2017 - 06:00:54 EST


Some programs place a MAP_FIXED below the stack, not leaving enough room
for the stack guard. This patch keeps track of MAP_FIXED, mirroring it in
a new VM_FIXED flag and reduces the stack guard to a single page (as it
used to be) in such a situation, assuming that when an application places
a fixed map close to the stack, it very likely does it on purpose and is
taking the full responsibility for the risk of the stack blowing up.

Cc: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Signed-off-by: Willy Tarreau <w@xxxxxx>
---
include/linux/mm.h | 1 +
include/linux/mman.h | 1 +
mm/mmap.c | 30 ++++++++++++++++++++----------
3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6f543a4..41492b9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -188,6 +188,7 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
+#define VM_FIXED 0x00800000 /* MAP_FIXED was used */
#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
#define VM_ARCH_2 0x02000000
#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 634c4c5..3a29069 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -86,6 +86,7 @@ static inline bool arch_validate_prot(unsigned long prot)
{
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
+ _calc_vm_trans(flags, MAP_FIXED, VM_FIXED ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED );
}

diff --git a/mm/mmap.c b/mm/mmap.c
index ece0f6d..7fc1c29 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2244,12 +2244,17 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
gap_addr = TASK_SIZE;

next = vma->vm_next;
+
+ /* PROT_NONE above a MAP_GROWSUP always serves as a mark and inhibits
+ * the stack guard gap.
+ * MAP_FIXED above a MAP_GROWSUP only requires a single page guard.
+ */
if (next && next->vm_start < gap_addr &&
- (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
- if (!(next->vm_flags & VM_GROWSUP))
- return -ENOMEM;
- /* Check that both stack segments have the same anon_vma? */
- }
+ !(next->vm_flags & VM_GROWSUP) &&
+ (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC)) &&
+ (!(next->vm_flags & VM_FIXED) ||
+ next->vm_start < address + PAGE_SIZE))
+ return -ENOMEM;

/* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
@@ -2329,12 +2334,17 @@ int expand_downwards(struct vm_area_struct *vma,
if (gap_addr > address)
return -ENOMEM;
prev = vma->vm_prev;
+
+ /* PROT_NONE below a MAP_GROWSDOWN always serves as a mark and inhibits
+ * the stack guard gap.
+ * MAP_FIXED below a MAP_GROWSDOWN only requires a single page guard.
+ */
if (prev && prev->vm_end > gap_addr &&
- (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
- if (!(prev->vm_flags & VM_GROWSDOWN))
- return -ENOMEM;
- /* Check that both stack segments have the same anon_vma? */
- }
+ !(prev->vm_flags & VM_GROWSDOWN) &&
+ (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC)) &&
+ (!(prev->vm_flags & VM_FIXED) ||
+ prev->vm_end > address - PAGE_SIZE))
+ return -ENOMEM;

/* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
--
1.7.12.1