Re: [PATCH] [0/6] HUGETLB memory commitment

From: Andy Whitcroft
Date: Thu Mar 25 2004 - 21:04:25 EST



--On 25 March 2004 23:59 +0000 Andy Whitcroft <apw@xxxxxxxxxxxx> wrote:

--On 25 March 2004 15:51 -0800 Andrew Morton <akpm@xxxxxxxx> wrote:

I think it's simply:

- Make normal overcommit logic skip hugepages completely

- Teach the overcommit_memory=2 logic that hugepages are basically
"pinned", so subtract them from the arithmetic.

And that's it. The hugepages are semantically quite different from
normal memory (prefaulted, preallocated, unswappable) and we've
deliberately avoided pretending otherwise.

Attached is a ground up patch, trying just to cure the overcommit bug. The main thrust is to ensure that VM_ACCOUNT actually only gets set on vma's which are indeed accountable. With that ensured much of the rest comes out in the wash. It also removes the hugetlb memory for the overcommit_memory=2 case.

Attached are two patches, core and arch changes. They have been compile tested on i386 and appear to work. Is that more what you had in mind?

-apw---
i386/mm/hugetlbpage.c | 6 ++++++
ia64/mm/hugetlbpage.c | 6 ++++++
ppc64/mm/hugetlbpage.c | 6 ++++++
sparc64/mm/hugetlbpage.c | 6 ++++++
4 files changed, 24 insertions(+)

diff -upN reference/arch/i386/mm/hugetlbpage.c current/arch/i386/mm/hugetlbpage.c
--- reference/arch/i386/mm/hugetlbpage.c 2004-01-09 07:00:02.000000000 +0000
+++ current/arch/i386/mm/hugetlbpage.c 2004-03-26 02:08:46.000000000 +0000
@@ -527,6 +527,12 @@ int is_hugepage_mem_enough(size_t size)
return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem;
}

+/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
+unsigned long hugetlb_total_pages(void)
+{
+ return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE);
+}
+
/*
* We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the
diff -upN reference/arch/ia64/mm/hugetlbpage.c current/arch/ia64/mm/hugetlbpage.c
--- reference/arch/ia64/mm/hugetlbpage.c 2004-03-11 20:47:12.000000000 +0000
+++ current/arch/ia64/mm/hugetlbpage.c 2004-03-26 02:08:46.000000000 +0000
@@ -592,6 +592,12 @@ int is_hugepage_mem_enough(size_t size)
return 1;
}

+/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
+unsigned long hugetlb_total_pages(void)
+{
+ return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE);
+}
+
static struct page *hugetlb_nopage(struct vm_area_struct * area, unsigned long address, int *unused)
{
BUG();
diff -upN reference/arch/ppc64/mm/hugetlbpage.c current/arch/ppc64/mm/hugetlbpage.c
--- reference/arch/ppc64/mm/hugetlbpage.c 2004-03-11 20:47:14.000000000 +0000
+++ current/arch/ppc64/mm/hugetlbpage.c 2004-03-26 02:08:46.000000000 +0000
@@ -912,6 +912,12 @@ int is_hugepage_mem_enough(size_t size)
return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpage_free;
}

+/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
+int hugetlb_total_pages(void)
+{
+ return htlbpage_total * (HPAGE_SIZE / PAGE_SIZE);
+}
+
/*
* We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the
diff -upN reference/arch/sparc64/mm/hugetlbpage.c current/arch/sparc64/mm/hugetlbpage.c
--- reference/arch/sparc64/mm/hugetlbpage.c 2004-01-09 06:59:45.000000000 +0000
+++ current/arch/sparc64/mm/hugetlbpage.c 2004-03-26 02:08:46.000000000 +0000
@@ -497,6 +497,12 @@ int is_hugepage_mem_enough(size_t size)
return (size + ~HPAGE_MASK)/HPAGE_SIZE <= htlbpagemem;
}

+/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
+int hugetlb_total_pages(void)
+{
+ return htlbzone_pages * (HPAGE_SIZE / PAGE_SIZE);
+}
+
/*
* We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the
---
include/linux/hugetlb.h | 5 +++++
include/linux/mm.h | 3 +++
mm/mmap.c | 7 ++++++-
mm/mprotect.c | 3 ++-
security/commoncap.c | 4 +++-
security/dummy.c | 4 +++-
security/selinux/hooks.c | 4 +++-
7 files changed, 25 insertions(+), 5 deletions(-)

diff -X /home/apw/lib/vdiff.excl -rupN reference/include/linux/hugetlb.h current/include/linux/hugetlb.h
--- reference/include/linux/hugetlb.h 2004-02-23 18:15:09.000000000 +0000
+++ current/include/linux/hugetlb.h 2004-03-26 02:08:46.000000000 +0000
@@ -19,6 +19,7 @@ int hugetlb_prefault(struct address_spac
void huge_page_release(struct page *);
int hugetlb_report_meminfo(char *);
int is_hugepage_mem_enough(size_t);
+unsigned long hugetlb_total_pages(void);
struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write);
struct vm_area_struct *hugepage_vma(struct mm_struct *mm,
@@ -48,6 +49,10 @@ static inline int is_vm_hugetlb_page(str
{
return 0;
}
+static inline unsigned long hugetlb_total_pages(void)
+{
+ return 0;
+}

#define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; })
#define follow_huge_addr(mm, vma, addr, write) 0
diff -X /home/apw/lib/vdiff.excl -rupN reference/include/linux/mm.h current/include/linux/mm.h
--- reference/include/linux/mm.h 2004-03-25 02:43:39.000000000 +0000
+++ current/include/linux/mm.h 2004-03-26 02:08:46.000000000 +0000
@@ -112,6 +112,9 @@ struct vm_area_struct {
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */

+/* It makes sense to apply VM_ACCOUNT to this vma. */
+#define VM_MAYACCT(vma) (!!((vma)->vm_flags & VM_HUGETLB))
+
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif
diff -X /home/apw/lib/vdiff.excl -rupN reference/mm/mmap.c current/mm/mmap.c
--- reference/mm/mmap.c 2004-03-25 02:43:43.000000000 +0000
+++ current/mm/mmap.c 2004-03-26 02:08:46.000000000 +0000
@@ -489,9 +489,13 @@ unsigned long do_mmap_pgoff(struct file
int correct_wcount = 0;
int error;
struct rb_node ** rb_link, * rb_parent;
+ int accountable = 1;
unsigned long charged = 0;

if (file) {
+ if (is_file_hugepages(file))
+ accountable = 0;
+
if (!file->f_op || !file->f_op->mmap)
return -ENODEV;

@@ -608,7 +612,8 @@ munmap_back:
> current->rlim[RLIMIT_AS].rlim_cur)
return -ENOMEM;

- if (!(flags & MAP_NORESERVE) || sysctl_overcommit_memory > 1) {
+ if (accountable && (!(flags & MAP_NORESERVE) ||
+ sysctl_overcommit_memory > 1)) {
if (vm_flags & VM_SHARED) {
/* Check memory availability in shmem_file_setup? */
vm_flags |= VM_ACCOUNT;
diff -X /home/apw/lib/vdiff.excl -rupN reference/mm/mprotect.c current/mm/mprotect.c
--- reference/mm/mprotect.c 2004-03-26 02:08:48.000000000 +0000
+++ current/mm/mprotect.c 2004-03-26 02:08:46.000000000 +0000
@@ -173,7 +173,8 @@ mprotect_fixup(struct vm_area_struct *vm
* a MAP_NORESERVE private mapping to writable will now reserve.
*/
if (newflags & VM_WRITE) {
- if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
+ if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))
+ && VM_MAYACCT(vma)) {
charged = (end - start) >> PAGE_SHIFT;
if (security_vm_enough_memory(charged))
return -ENOMEM;
diff -X /home/apw/lib/vdiff.excl -rupN reference/security/commoncap.c current/security/commoncap.c
--- reference/security/commoncap.c 2004-03-25 02:43:44.000000000 +0000
+++ current/security/commoncap.c 2004-03-26 02:22:03.000000000 +0000
@@ -22,6 +22,7 @@
#include <linux/netlink.h>
#include <linux/ptrace.h>
#include <linux/xattr.h>
+#include <linux/hugetlb.h>

int cap_capable (struct task_struct *tsk, int cap)
{
@@ -358,7 +359,8 @@ int cap_vm_enough_memory(long pages)
return -ENOMEM;
}

- allowed = totalram_pages * sysctl_overcommit_ratio / 100;
+ allowed = (totalram_pages - hugetlb_total_pages())
+ * sysctl_overcommit_ratio / 100;
allowed += total_swap_pages;

if (atomic_read(&vm_committed_space) < allowed)
diff -X /home/apw/lib/vdiff.excl -rupN reference/security/dummy.c current/security/dummy.c
--- reference/security/dummy.c 2004-03-25 02:43:44.000000000 +0000
+++ current/security/dummy.c 2004-03-26 02:22:12.000000000 +0000
@@ -25,6 +25,7 @@
#include <linux/netlink.h>
#include <net/sock.h>
#include <linux/xattr.h>
+#include <linux/hugetlb.h>

static int dummy_ptrace (struct task_struct *parent, struct task_struct *child)
{
@@ -146,7 +147,8 @@ static int dummy_vm_enough_memory(long p
return -ENOMEM;
}

- allowed = totalram_pages * sysctl_overcommit_ratio / 100;
+ allowed = (totalram_pages - hugetlb_total_pages())
+ * sysctl_overcommit_ratio / 100;
allowed += total_swap_pages;

if (atomic_read(&vm_committed_space) < allowed)
diff -X /home/apw/lib/vdiff.excl -rupN reference/security/selinux/hooks.c current/security/selinux/hooks.c
--- reference/security/selinux/hooks.c 2004-03-25 02:43:44.000000000 +0000
+++ current/security/selinux/hooks.c 2004-03-26 02:22:24.000000000 +0000
@@ -59,6 +59,7 @@
#include <net/af_unix.h> /* for Unix socket types */
#include <linux/parser.h>
#include <linux/nfs_mount.h>
+#include <linux/hugetlb.h>

#include "avc.h"
#include "objsec.h"
@@ -1544,7 +1545,8 @@ static int selinux_vm_enough_memory(long
return -ENOMEM;
}

- allowed = totalram_pages * sysctl_overcommit_ratio / 100;
+ allowed = (totalram_pages - hugetlb_total_pages())
+ * sysctl_overcommit_ratio / 100;
allowed += total_swap_pages;

if (atomic_read(&vm_committed_space) < allowed)