[RFC v2 3/6] mm, migrc: Skip TLB flushes at the CPUs that already have been done

From: Byungchul Park
Date: Thu Aug 17 2023 - 04:10:10 EST


TLB flushes can be skipped if TLB flushes requested have been done by
any reason, which doesn't have to be done from migrations. It can be
tracked by keeping timestamp(= migrc_gen) when it's requested and
when it's triggered.

Signed-off-by: Byungchul Park <byungchul@xxxxxx>
---
arch/x86/include/asm/tlbflush.h | 6 ++++
arch/x86/mm/tlb.c | 55 +++++++++++++++++++++++++++++++++
mm/migrate.c | 10 ++++++
mm/rmap.c | 1 +
4 files changed, 72 insertions(+)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 752d72ea209b..da987c15049e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -283,6 +283,12 @@ extern void arch_tlbbatch_clean(struct arch_tlbflush_unmap_batch *batch);
extern void arch_tlbbatch_fold(struct arch_tlbflush_unmap_batch *bdst,
struct arch_tlbflush_unmap_batch *bsrc);

+#ifdef CONFIG_MIGRC
+extern void arch_migrc_adj(struct arch_tlbflush_unmap_batch *batch, int gen);
+#else
+static inline void arch_migrc_adj(struct arch_tlbflush_unmap_batch *batch, int gen) {}
+#endif
+
static inline bool pte_flags_need_flush(unsigned long oldflags,
unsigned long newflags,
bool ignore_access)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 2dabf0f340fb..913cad013979 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1210,9 +1210,48 @@ STATIC_NOPV void native_flush_tlb_local(void)
native_write_cr3(__native_read_cr3());
}

+#ifdef CONFIG_MIGRC
+DEFINE_PER_CPU(int, migrc_done);
+
+static inline int migrc_tlb_local_begin(void)
+{
+ int ret = atomic_read(&migrc_gen);
+
+ /*
+ * XXX: barrier() would be sufficient if the architecture
+ * quarantees the order between memory access and TLB flush.
+ */
+ smp_mb();
+ return ret;
+}
+
+static inline void migrc_tlb_local_end(int gen)
+{
+ /*
+ * XXX: barrier() would be sufficient if the architecture
+ * quarantees the order between TLB flush and memory access.
+ */
+ smp_mb();
+ WRITE_ONCE(*this_cpu_ptr(&migrc_done), gen);
+}
+#else
+static inline int migrc_tlb_local_begin(void)
+{
+ return 0;
+}
+
+static inline void migrc_tlb_local_end(int gen)
+{
+}
+#endif
+
void flush_tlb_local(void)
{
+ unsigned int gen;
+
+ gen = migrc_tlb_local_begin();
__flush_tlb_local();
+ migrc_tlb_local_end(gen);
}

/*
@@ -1237,6 +1276,22 @@ void __flush_tlb_all(void)
}
EXPORT_SYMBOL_GPL(__flush_tlb_all);

+#ifdef CONFIG_MIGRC
+static inline bool before(int a, int b)
+{
+ return a - b < 0;
+}
+
+void arch_migrc_adj(struct arch_tlbflush_unmap_batch *batch, int gen)
+{
+ int cpu;
+
+ for_each_cpu(cpu, &batch->cpumask)
+ if (!before(READ_ONCE(*per_cpu_ptr(&migrc_done, cpu)), gen))
+ cpumask_clear_cpu(cpu, &batch->cpumask);
+}
+#endif
+
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
struct flush_tlb_info *info;
diff --git a/mm/migrate.c b/mm/migrate.c
index f9446f5b312a..c7b72d275b2a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2053,6 +2053,16 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
stats->nr_thp_failed += thp_retry;
stats->nr_failed_pages += nr_retry_pages;
move:
+ /*
+ * Should be prior to try_to_unmap_flush() so that
+ * migrc_try_flush_free_folios() that will be called later
+ * can take benefit from the TLB flushes in try_to_unmap_flush().
+ *
+ * migrc_req_end() will store the timestamp for pending, and
+ * TLB flushes will also store the timestamp for TLB flush so
+ * that unnecessary TLB flushes can be skipped using the time
+ * information.
+ */
if (migrc_cond1)
migrc_req_end();

diff --git a/mm/rmap.c b/mm/rmap.c
index 0652d25206ee..2ae1b1324f84 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -627,6 +627,7 @@ static bool __migrc_try_flush_free_folios(struct llist_head *h)
llist_for_each_entry_safe(req, req2, reqs, llnode) {
struct llist_node *n;

+ arch_migrc_adj(&req->arch, req->gen);
arch_tlbbatch_fold(&arch, &req->arch);

n = llist_del_all(&req->pages);
--
2.17.1