[RFC][PATCH] s390/mm: fix mis-accounting of pgtable_bytes

From: Martin Schwidefsky
Date: Fri Oct 12 2018 - 10:32:29 EST


In case a fork or a clone system fails in copy_process and the error
handling does the mmput() at the bad_fork_cleanup_mm label, the following
warning messages will appear on the console:

BUG: non-zero pgtables_bytes on freeing mm: 16384

The reason for that is the tricks we play with mm_inc_nr_puds() and
mm_inc_nr_pmds() in init_new_context().

A normal 64-bit process has 3 levels of page table, the p4d level and
the pud level are folded. On process termination the free_pud_range()
function in mm/memory.c will subtract 16KB from pgtable_bytes with a
mm_dec_nr_puds() call, but there actually is not really a pud table.
The s390 version of pud_free_tlb() recognized this an does nothing,
the region-3 table will be freed with the pgd_free() call later on.
But the mm_dec_nr_puds() is done unconditionally, to counter act this
the init_new_context() function has an extra mm_inc_nr_puds() call.

Now with a failed fork or clone the free_pgtables() function is not
called, there is no mm_dec_nr_puds() but the mm_inc_nr_puds() has
been done which leads to the incorrect pgtable_bytes of 16384.
Nothing is broken by this, but the warning is annoying.

To get rid of the warning drop the mm_inc_nr_pmds() & mm_inc_nr_puds()
calls from init_new_context(), introduce the mm_pmd_folded(),
pmd_pud_folded() and pmd_p4d_folded() helper, and add if-statements
to the functions mm_[inc|dec]_nr_[pmds|puds].

Signed-off-by: Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
---
arch/s390/include/asm/mmu_context.h | 5 -----
arch/s390/include/asm/pgalloc.h | 6 ++---
arch/s390/include/asm/pgtable.h | 18 +++++++++++++++
arch/s390/include/asm/tlb.h | 6 ++---
include/linux/mm.h | 44 ++++++++++++++++++++++++++++++++-----
5 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index dbd689d556ce..ccbb53e22024 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_limit = STACK_TOP_MAX;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
- /* pgd_alloc() did not account this pud */
- mm_inc_nr_puds(mm);
break;
case -PAGE_SIZE:
/* forked 5-level task, set new asce with new_mm->pgd */
@@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk,
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
- /* pgd_alloc() did not account this pmd */
- mm_inc_nr_pmds(mm);
- mm_inc_nr_puds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..5ee733720a57 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry)

static inline unsigned long pgd_entry_type(struct mm_struct *mm)
{
- if (mm->context.asce_limit <= _REGION3_SIZE)
+ if (mm_pmd_folded(mm))
return _SEGMENT_ENTRY_EMPTY;
- if (mm->context.asce_limit <= _REGION2_SIZE)
+ if (mm_pud_folded(mm))
return _REGION3_ENTRY_EMPTY;
- if (mm->context.asce_limit <= _REGION1_SIZE)
+ if (mm_p4d_folded(mm))
return _REGION2_ENTRY_EMPTY;
return _REGION1_ENTRY_EMPTY;
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 411d435e7a7d..063732414dfb 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -493,6 +493,24 @@ static inline int is_module_addr(void *addr)
_REGION_ENTRY_PROTECT | \
_REGION_ENTRY_NOEXEC)

+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+ return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+ return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
static inline int mm_has_pgste(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 457b7ba0fbb6..b31c779cf581 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= _REGION3_SIZE)
+ if (mm_pmd_folded(tlb->mm))
return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
tlb_remove_table(tlb, pmd);
@@ -152,7 +152,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= _REGION1_SIZE)
+ if (mm_p4d_folded(tlb->mm))
return;
tlb_remove_table(tlb, p4d);
}
@@ -167,7 +167,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= _REGION2_SIZE)
+ if (mm_pud_folded(tlb->mm))
return;
tlb_remove_table(tlb, pud);
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..1e4a045f19ec 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -105,6 +105,34 @@ extern int mmap_rnd_compat_bits __read_mostly;
#define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page)))
#endif

+/*
+ * On some architectures it depends on the mm if the p4d/pud or pmd
+ * layer of the page table hierarchy is folded or not.
+ */
+#ifndef mm_p4d_folded
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+ return __is_defined(__PAGETABLE_P4D_FOLDED);
+}
+#endif
+
+#ifndef mm_pud_folded
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ return __is_defined(__PAGETABLE_PUD_FOLDED);
+}
+#endif
+
+#ifndef mm_pmd_folded
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+ return __is_defined(__PAGETABLE_PMD_FOLDED);
+}
+#endif
+
/*
* Default maximum number of active map areas, this limits the number of vmas
* per mm struct. Users can overwrite this number by sysctl but there is a
@@ -1710,7 +1738,7 @@ static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
#endif

-#if defined(__PAGETABLE_PUD_FOLDED) || !defined(CONFIG_MMU)
+#if !defined(CONFIG_MMU)
static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
unsigned long address)
{
@@ -1724,16 +1752,18 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);

static inline void mm_inc_nr_puds(struct mm_struct *mm)
{
- atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
+ if (!mm_pud_folded(mm))
+ atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
}

static inline void mm_dec_nr_puds(struct mm_struct *mm)
{
- atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
+ if (!mm_pud_folded(mm))
+ atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
}
#endif

-#if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
+#if !defined(CONFIG_MMU)
static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
unsigned long address)
{
@@ -1748,12 +1778,14 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);

static inline void mm_inc_nr_pmds(struct mm_struct *mm)
{
- atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
+ if (!mm_pmd_folded(mm))
+ atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
}

static inline void mm_dec_nr_pmds(struct mm_struct *mm)
{
- atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
+ if (!mm_pmd_folded(mm))
+ atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
}
#endif

--
2.16.4


--
blue skies,
Martin.

"Reality continues to ruin my life." - Calvin.