[RFC PATCH 31/31] sysctl: toggle to promote PUD-mapped 1GB THP or not.

From: Zi Yan
Date: Fri Feb 15 2019 - 17:10:05 EST


From: Zi Yan <ziy@xxxxxxxxxx>

Only promotion PMD THP by default.

Signed-off-by: Zi Yan <ziy@xxxxxxxxxx>
---
kernel/sysctl.c | 11 +++++++++++
mm/mem_defrag.c | 17 +++++++++++++----
2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 762535a2c7d1..20263d2c39b9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -121,6 +121,7 @@ extern int vma_scan_threshold_type;
extern int vma_no_repeat_defrag;
extern int num_breakout_chunks;
extern int defrag_size_threshold;
+extern int mem_defrag_promote_thp;

extern int only_print_head_pfn;

@@ -135,6 +136,7 @@ static int zero;
static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
+static int __maybe_unused fifteen = 15;
static unsigned long one_ul = 1;
static int one_hundred = 100;
static int one_thousand = 1000;
@@ -1761,6 +1763,15 @@ static struct ctl_table vm_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "mem_defrag_promote_thp",
+ .data = &mem_defrag_promote_thp,
+ .maxlen = sizeof(mem_defrag_promote_thp),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &fifteen,
+ },
{ }
};

diff --git a/mm/mem_defrag.c b/mm/mem_defrag.c
index d7a579924d12..7cfa99351925 100644
--- a/mm/mem_defrag.c
+++ b/mm/mem_defrag.c
@@ -64,12 +64,18 @@ enum {
VMA_THRESHOLD_TYPE_SIZE,
};

+#define PROMOTE_PMD_MAP (0x8)
+#define PROMOTE_PMD_PAGE (0x4)
+#define PROMOTE_PUD_MAP (0x2)
+#define PROMOTE_PUD_PAGE (0x1)
+
int num_breakout_chunks;
int vma_scan_percentile = 100;
int vma_scan_threshold_type = VMA_THRESHOLD_TYPE_TIME;
int vma_no_repeat_defrag;
int kmem_defragd_always;
int defrag_size_threshold = 5;
+int mem_defrag_promote_thp = (PROMOTE_PMD_MAP|PROMOTE_PMD_PAGE);
static DEFINE_SPINLOCK(kmem_defragd_mm_lock);

#define MM_SLOTS_HASH_BITS 10
@@ -1613,7 +1619,8 @@ static int kmem_defragd_scan_mm(struct defrag_scan_control *sc)
/* defrag works for the whole chunk,
* promote to THP in place
*/
- if (!defrag_result &&
+ if ((mem_defrag_promote_thp & PROMOTE_PMD_PAGE) &&
+ !defrag_result &&
/* skip existing THPs */
defrag_stats.aligned_max_order < HPAGE_PMD_ORDER &&
!(*scan_address & (HPAGE_PMD_SIZE-1)) &&
@@ -1628,7 +1635,8 @@ static int kmem_defragd_scan_mm(struct defrag_scan_control *sc)
* still PTE pointed
*/
/* promote PTE-mapped THP to PMD-mapped */
- promote_huge_pmd_address(vma, *scan_address);
+ if (mem_defrag_promote_thp & PROMOTE_PMD_MAP)
+ promote_huge_pmd_address(vma, *scan_address);
}
up_write(&mm->mmap_sem);
}
@@ -1654,7 +1662,8 @@ static int kmem_defragd_scan_mm(struct defrag_scan_control *sc)
}

/* defrag works for the whole chunk, promote to PUD THP in place */
- if (!nr_fails_in_1gb_range &&
+ if ((mem_defrag_promote_thp & PROMOTE_PUD_PAGE) &&
+ !nr_fails_in_1gb_range &&
!skip_promotion && /* avoid existing THP */
!(defrag_begin & (HPAGE_PUD_SIZE-1)) &&
!(defrag_end & (HPAGE_PUD_SIZE-1))) {
@@ -1668,7 +1677,7 @@ static int kmem_defragd_scan_mm(struct defrag_scan_control *sc)
* still PMD pointed
*/
/* promote PMD-mapped THP to PUD-mapped */
- if (mem_defrag_promote_1gb_thp)
+ if (mem_defrag_promote_thp & PROMOTE_PUD_MAP)
promote_huge_pud_address(vma, defrag_begin);
}
up_write(&mm->mmap_sem);
--
2.20.1