Re: [PATCH 2/3] mm, lru_gen: move pages in bulk when aging

From: Yu Zhao
Date: Mon Dec 25 2023 - 01:58:48 EST


On Fri, Dec 22, 2023 at 3:24 AM Kairui Song <ryncsn@xxxxxxxxx> wrote:
>
> From: Kairui Song <kasong@xxxxxxxxxxx>
>
> Another overhead of aging is page moving. Actually, in most cases,
> pages are being moved to the same gen after folio_inc_gen is called,
> especially the protected pages. So it's better to move them in bulk.
>
> This also has a good effect on LRU ordering. Currently when MGLRU
> ages, it walks the LRU backward, and the protected pages are moved to
> the tail of newer gen one by one, which reverses the order of pages in
> LRU. Moving them in batches can help keep their order, only in a small
> scope though due to the scan limit of MAX_LRU_BATCH pages.
>
> After this commit, we can see a performance gain:
>
> Tested in a 4G memcg on a EPYC 7K62 with:
>
> memcached -u nobody -m 16384 -s /tmp/memcached.socket \
> -a 0766 -t 16 -B binary &
>
> memtier_benchmark -S /tmp/memcached.socket \
> -P memcache_binary -n allkeys \
> --key-minimum=1 --key-maximum=16000000 -d 1024 \
> --ratio=1:0 --key-pattern=P:P -c 2 -t 16 --pipeline 8 -x 6
>
> Average result of 18 test runs:
>
> Before: 44017.78 Ops/sec
> After patch 1-2: 44810.01 Ops/sec (+1.8%)

Was it tested with CONFIG_DEBUG_LIST=y?

Also, the (44810.01-44687.08)/44687.08=0.0027 improvement also sounded
like a noise to me.

> Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx>
> ---
> mm/vmscan.c | 84 ++++++++++++++++++++++++++++++++++++++++++++---------
> 1 file changed, 71 insertions(+), 13 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index e3b4797b9729..af1266129c1b 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3102,9 +3102,46 @@ static int folio_update_gen(struct folio *folio, int gen)
> */
> struct gen_update_batch {
> int delta[MAX_NR_GENS];
> + struct folio *head, *tail;
> };
>
> -static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone,
> +static void inline lru_gen_inc_bulk_finish(struct lru_gen_folio *lrugen,
> + int bulk_gen, bool type, int zone,
> + struct gen_update_batch *batch)
> +{
> + if (!batch->head)
> + return;
> +
> + list_bulk_move_tail(&lrugen->folios[bulk_gen][type][zone],
> + &batch->head->lru,
> + &batch->tail->lru);
> +
> + batch->head = NULL;
> +}
> +
> +/*
> + * When aging, protected pages will go to the tail of the same higher
> + * gen, so the can be moved in batches. Besides reduced overhead, this
> + * also avoids changing their LRU order in a small scope.
> + */
> +static void inline lru_gen_try_inc_bulk(struct lru_gen_folio *lrugen, struct folio *folio,
> + int bulk_gen, int gen, bool type, int zone,
> + struct gen_update_batch *batch)
> +{
> + /*
> + * If folio not moving to the bulk_gen, it's raced with promotion
> + * so it need to go to the head of another LRU.
> + */
> + if (bulk_gen != gen)
> + list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
> +
> + if (!batch->head)
> + batch->tail = folio;
> +
> + batch->head = folio;
> +}
> +
> +static void lru_gen_update_batch(struct lruvec *lruvec, int bulk_gen, bool type, int zone,
> struct gen_update_batch *batch)
> {
> int gen;
> @@ -3112,6 +3149,8 @@ static void lru_gen_update_batch(struct lruvec *lruvec, bool type, int zone,
> struct lru_gen_folio *lrugen = &lruvec->lrugen;
> enum lru_list lru = type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
>
> + lru_gen_inc_bulk_finish(lrugen, bulk_gen, type, zone, batch);
> +
> for (gen = 0; gen < MAX_NR_GENS; gen++) {
> int delta = batch->delta[gen];
>
> @@ -3705,6 +3744,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
> struct gen_update_batch batch = { };
> struct lru_gen_folio *lrugen = &lruvec->lrugen;
> int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
> + int bulk_gen = (old_gen + 1) % MAX_NR_GENS;
>
> if (type == LRU_GEN_ANON && !can_swap)
> goto done;
> @@ -3712,24 +3752,33 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
> /* prevent cold/hot inversion if force_scan is true */
> for (zone = 0; zone < MAX_NR_ZONES; zone++) {
> struct list_head *head = &lrugen->folios[old_gen][type][zone];
> + struct folio *prev = NULL;
>
> - while (!list_empty(head)) {
> - struct folio *folio = lru_to_folio(head);
> + if (!list_empty(head))
> + prev = lru_to_folio(head);
>
> + while (prev) {
> + struct folio *folio = prev;
> VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
> VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
> VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
>
> + if (unlikely(list_is_first(&folio->lru, head)))
> + prev = NULL;
> + else
> + prev = lru_to_folio(&folio->lru);
> +
> new_gen = folio_inc_gen(lruvec, folio, false, &batch);
> - list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
> + lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, new_gen, type, zone, &batch);
>
> if (!--remaining) {
> - lru_gen_update_batch(lruvec, type, zone, &batch);
> + lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
> return false;
> }
> }
> - lru_gen_update_batch(lruvec, type, zone, &batch);
> +
> + lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
> }
> done:
> reset_ctrl_pos(lruvec, type, true);
> @@ -4240,7 +4289,7 @@ static int lru_gen_memcg_seg(struct lruvec *lruvec)
> ******************************************************************************/
>
> static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc,
> - int tier_idx, struct gen_update_batch *batch)
> + int tier_idx, int bulk_gen, struct gen_update_batch *batch)
> {
> bool success;
> int gen = folio_lru_gen(folio);
> @@ -4283,7 +4332,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
> int hist = lru_hist_from_seq(lrugen->min_seq[type]);
>
> gen = folio_inc_gen(lruvec, folio, false, batch);
> - list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> + lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch);
>
> WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
> lrugen->protected[hist][type][tier - 1] + delta);
> @@ -4293,7 +4342,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
> /* ineligible */
> if (zone > sc->reclaim_idx || skip_cma(folio, sc)) {
> gen = folio_inc_gen(lruvec, folio, false, batch);
> - list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
> + lru_gen_try_inc_bulk(lrugen, folio, bulk_gen, gen, type, zone, batch);
> return true;
> }
>
> @@ -4367,11 +4416,16 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> LIST_HEAD(moved);
> int skipped_zone = 0;
> struct gen_update_batch batch = { };
> + int bulk_gen = (gen + 1) % MAX_NR_GENS;
> int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
> struct list_head *head = &lrugen->folios[gen][type][zone];
> + struct folio *prev = NULL;
>
> - while (!list_empty(head)) {
> - struct folio *folio = lru_to_folio(head);
> + if (!list_empty(head))
> + prev = lru_to_folio(head);
> +
> + while (prev) {
> + struct folio *folio = prev;
> int delta = folio_nr_pages(folio);
>
> VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
> @@ -4380,8 +4434,12 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
>
> scanned += delta;
> + if (unlikely(list_is_first(&folio->lru, head)))
> + prev = NULL;
> + else
> + prev = lru_to_folio(&folio->lru);
>
> - if (sort_folio(lruvec, folio, sc, tier, &batch))
> + if (sort_folio(lruvec, folio, sc, tier, bulk_gen, &batch))
> sorted += delta;
> else if (isolate_folio(lruvec, folio, sc)) {
> list_add(&folio->lru, list);
> @@ -4401,7 +4459,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
> skipped += skipped_zone;
> }
>
> - lru_gen_update_batch(lruvec, type, zone, &batch);
> + lru_gen_update_batch(lruvec, bulk_gen, type, zone, &batch);
>
> if (!remaining || isolated >= MIN_LRU_BATCH)
> break;
> --
> 2.43.0
>