[RFC PATCH v3 2/6] workingset: move refault distance checking into to a helper

From: Kairui Song
Date: Wed Sep 20 2023 - 15:03:21 EST


From: Kairui Song <kasong@xxxxxxxxxxx>

There isn't any feature change, just move the refault distance checking
logic into a standalone helper so it can be reused later.

Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx>
---
mm/workingset.c | 137 ++++++++++++++++++++++++++++--------------------
1 file changed, 79 insertions(+), 58 deletions(-)

diff --git a/mm/workingset.c b/mm/workingset.c
index 8613945fc66e..b0704cbfc667 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -170,9 +170,10 @@
*/

#define WORKINGSET_SHIFT 1
-#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \
+#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \
WORKINGSET_SHIFT + NODES_SHIFT + \
MEM_CGROUP_ID_SHIFT)
+#define EVICTION_BITS (BITS_PER_LONG - (EVICTION_SHIFT))
#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)

/*
@@ -216,6 +217,79 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
*workingsetp = workingset;
}

+/*
+ * Get the refault distance timestamp reading at eviction time.
+ */
+static inline unsigned long lru_eviction(struct lruvec *lruvec,
+ int bits, int bucket_order)
+{
+ unsigned long eviction = atomic_long_read(&lruvec->nonresident_age);
+
+ eviction >>= bucket_order;
+ eviction &= ~0UL >> (BITS_PER_LONG - bits);
+
+ return eviction;
+}
+
+/*
+ * Calculate and test refault distance.
+ */
+static inline bool lru_test_refault(struct mem_cgroup *memcg,
+ struct lruvec *lruvec,
+ unsigned long eviction, bool file,
+ int bits, int bucket_order)
+{
+ unsigned long refault, distance;
+ unsigned long active, inactive_file, inactive_anon;
+
+ eviction <<= bucket_order;
+ refault = atomic_long_read(&lruvec->nonresident_age);
+
+ /*
+ * The unsigned subtraction here gives an accurate distance
+ * across nonresident_age overflows in most cases. There is a
+ * special case: usually, shadow entries have a short lifetime
+ * and are either refaulted or reclaimed along with the inode
+ * before they get too old. But it is not impossible for the
+ * nonresident_age to lap a shadow entry in the field, which
+ * can then result in a false small refault distance, leading
+ * to a false activation should this old entry actually
+ * refault again. However, earlier kernels used to deactivate
+ * unconditionally with *every* reclaim invocation for the
+ * longest time, so the occasional inappropriate activation
+ * leading to pressure on the active list is not a problem.
+ */
+ distance = (refault - eviction) & (~0UL >> (BITS_PER_LONG - bits));
+
+ /*
+ * Compare the distance to the existing workingset size. We
+ * don't activate pages that couldn't stay resident even if
+ * all the memory was available to the workingset. Whether
+ * workingset competition needs to consider anon or not depends
+ * on having free swap space.
+ */
+ active = lruvec_page_state(lruvec, NR_ACTIVE_FILE);
+ inactive_file = lruvec_page_state(lruvec, NR_INACTIVE_FILE);
+
+ if (mem_cgroup_get_nr_swap_pages(memcg) > 0) {
+ active += lruvec_page_state(lruvec, NR_ACTIVE_ANON);
+ inactive_anon = lruvec_page_state(lruvec, NR_INACTIVE_ANON);
+ } else {
+ inactive_anon = 0;
+ }
+
+ /*
+ * When there are already enough active pages, be less aggressive
+ * on reactivating pages, challenge an large set of established
+ * active pages with one time refaulted page may not be a good idea.
+ */
+ if (active >= inactive_anon + inactive_file)
+ return distance < inactive_anon + inactive_file;
+ else
+ return distance < active + \
+ (file ? inactive_anon : inactive_file);
+}
+
#ifdef CONFIG_LRU_GEN

static void *lru_gen_eviction(struct folio *folio)
@@ -386,11 +460,10 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
/* XXX: target_memcg can be NULL, go through lruvec */
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
- eviction = atomic_long_read(&lruvec->nonresident_age);
- eviction >>= bucket_order;
+ eviction = lru_eviction(lruvec, EVICTION_BITS, bucket_order);
workingset_age_nonresident(lruvec, folio_nr_pages(folio));
return pack_shadow(memcgid, pgdat, eviction,
- folio_test_workingset(folio));
+ folio_test_workingset(folio));
}

/**
@@ -408,11 +481,6 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
{
struct mem_cgroup *eviction_memcg;
struct lruvec *eviction_lruvec;
- unsigned long refault_distance;
- unsigned long inactive_file;
- unsigned long inactive_anon;
- unsigned long refault;
- unsigned long active;
int memcgid;
struct pglist_data *pgdat;
unsigned long eviction;
@@ -421,7 +489,6 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
return lru_gen_test_recent(shadow, file, &eviction_lruvec, &eviction, workingset);

unpack_shadow(shadow, &memcgid, &pgdat, &eviction, workingset);
- eviction <<= bucket_order;

/*
* Look up the memcg associated with the stored ID. It might
@@ -442,56 +509,10 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
eviction_memcg = mem_cgroup_from_id(memcgid);
if (!mem_cgroup_disabled() && !eviction_memcg)
return false;
-
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
- refault = atomic_long_read(&eviction_lruvec->nonresident_age);

- /*
- * Calculate the refault distance
- *
- * The unsigned subtraction here gives an accurate distance
- * across nonresident_age overflows in most cases. There is a
- * special case: usually, shadow entries have a short lifetime
- * and are either refaulted or reclaimed along with the inode
- * before they get too old. But it is not impossible for the
- * nonresident_age to lap a shadow entry in the field, which
- * can then result in a false small refault distance, leading
- * to a false activation should this old entry actually
- * refault again. However, earlier kernels used to deactivate
- * unconditionally with *every* reclaim invocation for the
- * longest time, so the occasional inappropriate activation
- * leading to pressure on the active list is not a problem.
- */
- refault_distance = (refault - eviction) & EVICTION_MASK;
-
- /*
- * Compare the distance to the existing workingset size. We
- * don't activate pages that couldn't stay resident even if
- * all the memory was available to the workingset. Whether
- * workingset competition needs to consider anon or not depends
- * on having free swap space.
- */
- active = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE);
- inactive_file = lruvec_page_state(eviction_lruvec, NR_INACTIVE_FILE);
-
- if (mem_cgroup_get_nr_swap_pages(eviction_memcg) > 0) {
- active += lruvec_page_state(eviction_lruvec,
- NR_ACTIVE_ANON);
- inactive_anon = lruvec_page_state(eviction_lruvec,
- NR_INACTIVE_ANON);
- } else {
- inactive_anon = 0;
- }
-
- /*
- * When there are already enough active pages, be less aggressive
- * on reactivating pages, challenge an large set of established
- * active pages with one time refaulted page may not be a good idea.
- */
- if (active >= inactive_anon + inactive_file)
- return refault_distance < inactive_anon + inactive_file;
- else
- return refault_distance < active + (file ? inactive_anon : inactive_file);
+ return lru_test_refault(eviction_memcg, eviction_lruvec, eviction,
+ file, EVICTION_BITS, bucket_order);
}

/**
--
2.41.0