Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
From: Zi Yan
Date: Thu Oct 19 2023 - 09:23:14 EST
On 19 Oct 2023, at 2:09, Huang, Ying wrote:
> Zi Yan <ziy@xxxxxxxxxx> writes:
>
>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>
>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>> when migrating pages. The distribution of this hotspot is as follows:
>>> - 18.75% compact_zone
>>> - 17.39% migrate_pages
>>> - 13.79% migrate_pages_batch
>>> - 11.66% migrate_folio_move
>>> - 7.02% lru_add_drain
>>> + 7.02% lru_add_drain_cpu
>>> + 3.00% move_to_new_folio
>>> 1.23% rmap_walk
>>> + 1.92% migrate_folio_unmap
>>> + 3.20% migrate_pages_sync
>>> + 0.90% isolate_migratepages
>>>
>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>> immediately, to help to build up the correct newpage->mlock_count in
>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>> for the heavy concurrent scenarios.
>>
>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>
> lru_add_drain() is called after the page reference count checking in
> move_to_new_folio(). So, I don't this is an issue.
You are right. I missed that. Thanks for pointing this out.
>
>>>
>>> So we can record the source pages' mlocked status in migrate_folio_unmap(),
>>> and only drain the lru list when the mlocked status is set in migrate_folio_move().
>>> In addition, the page was already isolated from lru when migrating, so we
>>> check the mlocked status is stable by folio_test_mlocked() in migrate_folio_unmap().
>>>
>>> After this patch, I can see the hotpot of the lru_add_drain() is gone:
>>> - 9.41% migrate_pages_batch
>>> - 6.15% migrate_folio_move
>>> - 3.64% move_to_new_folio
>>> + 1.80% migrate_folio_extra
>>> + 1.70% buffer_migrate_folio
>>> + 1.41% rmap_walk
>>> + 0.62% folio_add_lru
>>> + 3.07% migrate_folio_unmap
>>>
>>> Meanwhile, the compaction latency shows some improvements when running
>>> thpscale:
>>> base patched
>>> Amean fault-both-1 1131.22 ( 0.00%) 1112.55 * 1.65%*
>>> Amean fault-both-3 2489.75 ( 0.00%) 2324.15 * 6.65%*
>>> Amean fault-both-5 3257.37 ( 0.00%) 3183.18 * 2.28%*
>>> Amean fault-both-7 4257.99 ( 0.00%) 4079.04 * 4.20%*
>>> Amean fault-both-12 6614.02 ( 0.00%) 6075.60 * 8.14%*
>>> Amean fault-both-18 10607.78 ( 0.00%) 8978.86 * 15.36%*
>>> Amean fault-both-24 14911.65 ( 0.00%) 11619.55 * 22.08%*
>>> Amean fault-both-30 14954.67 ( 0.00%) 14925.66 * 0.19%*
>>> Amean fault-both-32 16654.87 ( 0.00%) 15580.31 * 6.45%*
>>>
>>> Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
>>> ---
>>> mm/migrate.c | 50 ++++++++++++++++++++++++++++++++++++++------------
>>> 1 file changed, 38 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/mm/migrate.c b/mm/migrate.c
>>> index 4caf405b6504..32c96f89710f 100644
>>> --- a/mm/migrate.c
>>> +++ b/mm/migrate.c
>>> @@ -1027,22 +1027,32 @@ union migration_ptr {
>>> struct anon_vma *anon_vma;
>>> struct address_space *mapping;
>>> };
>>> +
>>> +enum {
>>> + PAGE_WAS_MAPPED = 1 << 0,
>>> + PAGE_WAS_MLOCKED = 1 << 1,
>>> +};
>>> +
>>> static void __migrate_folio_record(struct folio *dst,
>>> - unsigned long page_was_mapped,
>>> + unsigned long page_flags,
>>> struct anon_vma *anon_vma)
>>> {
>>> union migration_ptr ptr = { .anon_vma = anon_vma };
>>> dst->mapping = ptr.mapping;
>>> - dst->private = (void *)page_was_mapped;
>>> + dst->private = (void *)page_flags;
>>> }
>>>
>>> static void __migrate_folio_extract(struct folio *dst,
>>> int *page_was_mappedp,
>>> + int *page_was_mlocked,
>>> struct anon_vma **anon_vmap)
>>> {
>>> union migration_ptr ptr = { .mapping = dst->mapping };
>>> + unsigned long page_flags = (unsigned long)dst->private;
>>> +
>>> *anon_vmap = ptr.anon_vma;
>>> - *page_was_mappedp = (unsigned long)dst->private;
>>> + *page_was_mappedp = page_flags & PAGE_WAS_MAPPED ? 1 : 0;
>>> + *page_was_mlocked = page_flags & PAGE_WAS_MLOCKED ? 1 : 0;
>>> dst->mapping = NULL;
>>> dst->private = NULL;
>>> }
>>> @@ -1103,7 +1113,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>> {
>>> struct folio *dst;
>>> int rc = -EAGAIN;
>>> - int page_was_mapped = 0;
>>> + int page_was_mapped = 0, page_was_mlocked = 0;
>>> struct anon_vma *anon_vma = NULL;
>>> bool is_lru = !__folio_test_movable(src);
>>> bool locked = false;
>>> @@ -1157,6 +1167,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>> folio_lock(src);
>>> }
>>> locked = true;
>>> + page_was_mlocked = folio_test_mlocked(src);
>>>
>>> if (folio_test_writeback(src)) {
>>> /*
>>> @@ -1206,7 +1217,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>> dst_locked = true;
>>>
>>> if (unlikely(!is_lru)) {
>>> - __migrate_folio_record(dst, page_was_mapped, anon_vma);
>>> + __migrate_folio_record(dst, 0, anon_vma);
>>> return MIGRATEPAGE_UNMAP;
>>> }
>>>
>>> @@ -1236,7 +1247,13 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>> }
>>>
>>> if (!folio_mapped(src)) {
>>> - __migrate_folio_record(dst, page_was_mapped, anon_vma);
>>> + unsigned int page_flags = 0;
>>> +
>>> + if (page_was_mapped)
>>> + page_flags |= PAGE_WAS_MAPPED;
>>> + if (page_was_mlocked)
>>> + page_flags |= PAGE_WAS_MLOCKED;
>>> + __migrate_folio_record(dst, page_flags, anon_vma);
>>> return MIGRATEPAGE_UNMAP;
>>> }
>>>
>>> @@ -1261,12 +1278,13 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>> struct list_head *ret)
>>> {
>>> int rc;
>>> - int page_was_mapped = 0;
>>> + int page_was_mapped = 0, page_was_mlocked = 0;
>>> struct anon_vma *anon_vma = NULL;
>>> bool is_lru = !__folio_test_movable(src);
>>> struct list_head *prev;
>>>
>>> - __migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>>> + __migrate_folio_extract(dst, &page_was_mapped,
>>> + &page_was_mlocked, &anon_vma);
>>
>> It is better to read out the flag, then check page_was_mapped and page_was_mlocked
>> to avoid future __migrate_folio_extract() interface churns.
>
> IHMO, in contrast, it's better to use separate flags in
> __migrate_folio_record() too to avoid to pack flags in each call site.
I am OK with it as long as the parameters of these two are symmetric.
>
>>> prev = dst->lru.prev;
>>> list_del(&dst->lru);
>>>
>>> @@ -1287,7 +1305,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>> * isolated from the unevictable LRU: but this case is the easiest.
>>> */
>>> folio_add_lru(dst);
>>> - if (page_was_mapped)
>>> + if (page_was_mlocked)
>>> lru_add_drain();
>>
>> Like I said at the top, this would be if (page_was_mapped || page_was_mlocked).
>>
>>>
>>> if (page_was_mapped)
>>> @@ -1321,8 +1339,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>> * right list unless we want to retry.
>>> */
>>> if (rc == -EAGAIN) {
>>> + unsigned int page_flags = 0;
>>> +
>>> + if (page_was_mapped)
>>> + page_flags |= PAGE_WAS_MAPPED;
>>> + if (page_was_mlocked)
>>> + page_flags |= PAGE_WAS_MLOCKED;
>>> +
>>> list_add(&dst->lru, prev);
>>> - __migrate_folio_record(dst, page_was_mapped, anon_vma);
>>> + __migrate_folio_record(dst, page_flags, anon_vma);
>>> return rc;
>>> }
>>>
>>> @@ -1799,10 +1824,11 @@ static int migrate_pages_batch(struct list_head *from,
>>> dst = list_first_entry(&dst_folios, struct folio, lru);
>>> dst2 = list_next_entry(dst, lru);
>>> list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
>>> - int page_was_mapped = 0;
>>> + int page_was_mapped = 0, page_was_mlocked = 0;
>>> struct anon_vma *anon_vma = NULL;
>>>
>>> - __migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>>> + __migrate_folio_extract(dst, &page_was_mapped,
>>> + &page_was_mlocked, &anon_vma);
>>> migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
>>> true, ret_folios);
>>> list_del(&dst->lru);
>>> --
>>> 2.39.3
>
> --
> Best Regards,
> Huang, Ying
--
Best Regards,
Yan, Zi
Attachment:
signature.asc
Description: OpenPGP digital signature