Re: [PATCH] ppc64: Fix possible race with set_pte on a present PTE

From: Andrea Arcangeli
Date: Tue May 25 2004 - 16:50:10 EST


in the previous email I attached a slightly older version of the patch
that didn't optimize the spurious tlb flush away from do_wp_page yet,
this is the latest version I tested:

Index: linux-2.5/include/asm-alpha/pgtable.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-alpha/pgtable.h,v
retrieving revision 1.22
diff -u -p -r1.22 pgtable.h
--- linux-2.5/include/asm-alpha/pgtable.h 23 May 2004 05:02:25 -0000 1.22
+++ linux-2.5/include/asm-alpha/pgtable.h 25 May 2004 20:34:11 -0000
@@ -267,6 +267,28 @@ extern inline pte_t pte_mkexec(pte_t pte
extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; }
extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; }

+static inline void ptep_handle_young_page_fault(pte_t *ptep)
+{
+ /*
+ * WARNING: this is safe only because the dirty bit
+ * cannot change trasparently in hardware in the pte.
+ * If the dirty bit in the pte would be set trasparently
+ * by the CPU this should be implemented with set_bit()
+ * or with a new set_mask64() implementing an atomic "or".
+ */
+ set_pte(ptep, pte_mkyoung(*ptep));
+}
+
+static inline void ptep_handle_dirty_page_fault(pte_t *ptep)
+{
+ /*
+ * This can run without atomic instructions even if
+ * the young bit is set in hardware by the architecture.
+ * Losing the young bit is not important.
+ */
+ set_pte(ptep, pte_mkdirty(*ptep));
+}
+
#define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address))

/* to find an entry in a kernel page-table-directory */
Index: linux-2.5/include/asm-generic/pgtable.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-generic/pgtable.h,v
retrieving revision 1.4
diff -u -p -r1.4 pgtable.h
--- linux-2.5/include/asm-generic/pgtable.h 19 Jan 2004 18:43:03 -0000 1.4
+++ linux-2.5/include/asm-generic/pgtable.h 25 May 2004 20:38:39 -0000
@@ -12,6 +12,7 @@
*/
#define ptep_establish(__vma, __address, __ptep, __entry) \
do { \
+ BUG_ON(!pte_dirty(__entry)); \
set_pte(__ptep, __entry); \
flush_tlb_page(__vma, __address); \
} while (0)
Index: linux-2.5/include/asm-i386/pgtable.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-i386/pgtable.h,v
retrieving revision 1.42
diff -u -p -r1.42 pgtable.h
--- linux-2.5/include/asm-i386/pgtable.h 23 May 2004 05:02:25 -0000 1.42
+++ linux-2.5/include/asm-i386/pgtable.h 25 May 2004 20:27:00 -0000
@@ -220,7 +220,19 @@ static inline pte_t pte_mkdirty(pte_t pt
static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }

+/*
+ * This is used only to set the dirty bit during a "dirty" page fault.
+ * Nothing to do on x86 since it's set by the hardware transparently
+ * and it cannot generate page faults.
+ */
+#define ptep_handle_dirty_page_fault(ptep) do { } while (0)
static inline int ptep_test_and_clear_dirty(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); }
+/*
+ * This is used only to set the young bit during a "young" page fault.
+ * Nothing to do on x86 since it's set by the hardware transparently
+ * and it cannot generate page faults.
+ */
+#define ptep_handle_young_page_fault(ptep) do { } while (0)
static inline int ptep_test_and_clear_young(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); }
static inline void ptep_set_wrprotect(pte_t *ptep) { clear_bit(_PAGE_BIT_RW, &ptep->pte_low); }
static inline void ptep_mkdirty(pte_t *ptep) { set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); }
Index: linux-2.5/include/asm-x86_64/pgtable.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-x86_64/pgtable.h,v
retrieving revision 1.27
diff -u -p -r1.27 pgtable.h
--- linux-2.5/include/asm-x86_64/pgtable.h 23 May 2004 05:02:25 -0000 1.27
+++ linux-2.5/include/asm-x86_64/pgtable.h 25 May 2004 20:27:28 -0000
@@ -262,7 +262,19 @@ extern inline pte_t pte_mkexec(pte_t pte
extern inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
extern inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
extern inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
+/*
+ * This is used only to set the dirty bit during a "dirty" page fault.
+ * Nothing to do on x86-64 since it's set by the hardware transparently
+ * and it cannot generate page faults.
+ */
+#define ptep_handle_dirty_page_fault(ptep) do { } while (0)
static inline int ptep_test_and_clear_dirty(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_DIRTY, ptep); }
+/*
+ * This is used only to set the young bit during a "young" page fault.
+ * Nothing to do on x86-64 since it's set by the hardware transparently
+ * and it cannot generate page faults.
+ */
+#define ptep_handle_young_page_fault(ptep) do { } while(0)
static inline int ptep_test_and_clear_young(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_ACCESSED, ptep); }
static inline void ptep_set_wrprotect(pte_t *ptep) { clear_bit(_PAGE_BIT_RW, ptep); }
static inline void ptep_mkdirty(pte_t *ptep) { set_bit(_PAGE_BIT_DIRTY, ptep); }
Index: linux-2.5/mm/memory.c
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/mm/memory.c,v
retrieving revision 1.169
diff -u -p -r1.169 memory.c
--- linux-2.5/mm/memory.c 23 May 2004 05:10:11 -0000 1.169
+++ linux-2.5/mm/memory.c 25 May 2004 21:20:11 -0000
@@ -1056,7 +1056,7 @@ static int do_wp_page(struct mm_struct *
flush_cache_page(vma, address);
entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
vma);
- ptep_establish(vma, address, page_table, entry);
+ set_pte(page_table, entry);
update_mmu_cache(vma, address, entry);
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
@@ -1643,10 +1643,9 @@ static inline int handle_pte_fault(struc
if (!pte_write(entry))
return do_wp_page(mm, vma, address, pte, pmd, entry);

- entry = pte_mkdirty(entry);
+ ptep_handle_dirty_page_fault(pte);
}
- entry = pte_mkyoung(entry);
- ptep_establish(vma, address, pte, entry);
+ ptep_handle_young_page_fault(pte);
update_mmu_cache(vma, address, entry);
pte_unmap(pte);
spin_unlock(&mm->page_table_lock);
Signed-off-by: Andrea Arcangeli <andrea@xxxxxxx>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/