Re: [PATCH 1/8] migrate_pages: organize stats with struct migrate_pages_stats

From: Alistair Popple
Date: Thu Jan 05 2023 - 01:53:59 EST



"Huang, Ying" <ying.huang@xxxxxxxxx> writes:

> Alistair Popple <apopple@xxxxxxxxxx> writes:
>
>> Huang Ying <ying.huang@xxxxxxxxx> writes:
>>
>>> Define struct migrate_pages_stats to organize the various statistics
>>> in migrate_pages(). This makes it easier to collect and consume the
>>> statistics in multiple functions. This will be needed in the
>>> following patches in the series.
>>>
>>> Signed-off-by: "Huang, Ying" <ying.huang@xxxxxxxxx>
>>> Cc: Zi Yan <ziy@xxxxxxxxxx>
>>> Cc: Yang Shi <shy828301@xxxxxxxxx>
>>> Cc: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
>>> Cc: Oscar Salvador <osalvador@xxxxxxx>
>>> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
>>> Cc: Bharata B Rao <bharata@xxxxxxx>
>>> Cc: Alistair Popple <apopple@xxxxxxxxxx>
>>> Cc: haoxin <xhao@xxxxxxxxxxxxxxxxx>
>>> ---
>>> mm/migrate.c | 58 +++++++++++++++++++++++++++++-----------------------
>>> 1 file changed, 32 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/mm/migrate.c b/mm/migrate.c
>>> index a4d3fc65085f..ec9263a33d38 100644
>>> --- a/mm/migrate.c
>>> +++ b/mm/migrate.c
>>> @@ -1396,6 +1396,14 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f
>>> return rc;
>>> }
>>>
>>> +struct migrate_pages_stats {
>>> + int nr_succeeded;
>>> + int nr_failed_pages;
>>> + int nr_thp_succeeded;
>>> + int nr_thp_failed;
>>> + int nr_thp_split;
>>
>> I think some brief comments in the code for what each stat is tracking
>> and their relationship to each other would be helpful (ie. does
>> nr_succeeded include thp subpages, etc). Or at least a reference to
>> where this is documented (ie. page_migration.rst) as I recall there has
>> been some confusion in the past that has lead to bugs.
>
> OK, will do that in the next version.

You should add that nr_failed_pages doesn't count failures of migrations
that weren't attempted because eg. allocation failure as that was a
surprising detail to me at least. Unless of course you decide to fix
that :-)

>> Otherwise the patch looks good so:
>>
>> Reviewed-by: Alistair Popple <apopple@xxxxxxxxxx>
>
> Thanks!
>
> Best Regards,
> Huang, Ying
>
>>> +};
>>> +
>>> /*
>>> * migrate_pages - migrate the folios specified in a list, to the free folios
>>> * supplied as the target for the page migration
>>> @@ -1430,13 +1438,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> int large_retry = 1;
>>> int thp_retry = 1;
>>> int nr_failed = 0;
>>> - int nr_failed_pages = 0;
>>> int nr_retry_pages = 0;
>>> - int nr_succeeded = 0;
>>> - int nr_thp_succeeded = 0;
>>> int nr_large_failed = 0;
>>> - int nr_thp_failed = 0;
>>> - int nr_thp_split = 0;
>>> int pass = 0;
>>> bool is_large = false;
>>> bool is_thp = false;
>>> @@ -1446,9 +1449,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> LIST_HEAD(split_folios);
>>> bool nosplit = (reason == MR_NUMA_MISPLACED);
>>> bool no_split_folio_counting = false;
>>> + struct migrate_pages_stats stats;
>>>
>>> trace_mm_migrate_pages_start(mode, reason);
>>>
>>> + memset(&stats, 0, sizeof(stats));
>>> split_folio_migration:
>>> for (pass = 0; pass < 10 && (retry || large_retry); pass++) {
>>> retry = 0;
>>> @@ -1502,9 +1507,9 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> /* Large folio migration is unsupported */
>>> if (is_large) {
>>> nr_large_failed++;
>>> - nr_thp_failed += is_thp;
>>> + stats.nr_thp_failed += is_thp;
>>> if (!try_split_folio(folio, &split_folios)) {
>>> - nr_thp_split += is_thp;
>>> + stats.nr_thp_split += is_thp;
>>> break;
>>> }
>>> /* Hugetlb migration is unsupported */
>>> @@ -1512,7 +1517,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> nr_failed++;
>>> }
>>>
>>> - nr_failed_pages += nr_pages;
>>> + stats.nr_failed_pages += nr_pages;
>>> list_move_tail(&folio->lru, &ret_folios);
>>> break;
>>> case -ENOMEM:
>>> @@ -1522,13 +1527,13 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> */
>>> if (is_large) {
>>> nr_large_failed++;
>>> - nr_thp_failed += is_thp;
>>> + stats.nr_thp_failed += is_thp;
>>> /* Large folio NUMA faulting doesn't split to retry. */
>>> if (!nosplit) {
>>> int ret = try_split_folio(folio, &split_folios);
>>>
>>> if (!ret) {
>>> - nr_thp_split += is_thp;
>>> + stats.nr_thp_split += is_thp;
>>> break;
>>> } else if (reason == MR_LONGTERM_PIN &&
>>> ret == -EAGAIN) {
>>> @@ -1546,7 +1551,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> nr_failed++;
>>> }
>>>
>>> - nr_failed_pages += nr_pages + nr_retry_pages;
>>> + stats.nr_failed_pages += nr_pages + nr_retry_pages;
>>> /*
>>> * There might be some split folios of fail-to-migrate large
>>> * folios left in split_folios list. Move them back to migration
>>> @@ -1556,7 +1561,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> list_splice_init(&split_folios, from);
>>> /* nr_failed isn't updated for not used */
>>> nr_large_failed += large_retry;
>>> - nr_thp_failed += thp_retry;
>>> + stats.nr_thp_failed += thp_retry;
>>> goto out;
>>> case -EAGAIN:
>>> if (is_large) {
>>> @@ -1568,8 +1573,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> nr_retry_pages += nr_pages;
>>> break;
>>> case MIGRATEPAGE_SUCCESS:
>>> - nr_succeeded += nr_pages;
>>> - nr_thp_succeeded += is_thp;
>>> + stats.nr_succeeded += nr_pages;
>>> + stats.nr_thp_succeeded += is_thp;
>>> break;
>>> default:
>>> /*
>>> @@ -1580,20 +1585,20 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> */
>>> if (is_large) {
>>> nr_large_failed++;
>>> - nr_thp_failed += is_thp;
>>> + stats.nr_thp_failed += is_thp;
>>> } else if (!no_split_folio_counting) {
>>> nr_failed++;
>>> }
>>>
>>> - nr_failed_pages += nr_pages;
>>> + stats.nr_failed_pages += nr_pages;
>>> break;
>>> }
>>> }
>>> }
>>> nr_failed += retry;
>>> nr_large_failed += large_retry;
>>> - nr_thp_failed += thp_retry;
>>> - nr_failed_pages += nr_retry_pages;
>>> + stats.nr_thp_failed += thp_retry;
>>> + stats.nr_failed_pages += nr_retry_pages;
>>> /*
>>> * Try to migrate split folios of fail-to-migrate large folios, no
>>> * nr_failed counting in this round, since all split folios of a
>>> @@ -1626,16 +1631,17 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>>> if (list_empty(from))
>>> rc = 0;
>>>
>>> - count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
>>> - count_vm_events(PGMIGRATE_FAIL, nr_failed_pages);
>>> - count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
>>> - count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
>>> - count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
>>> - trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded,
>>> - nr_thp_failed, nr_thp_split, mode, reason);
>>> + count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded);
>>> + count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages);
>>> + count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded);
>>> + count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed);
>>> + count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split);
>>> + trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages,
>>> + stats.nr_thp_succeeded, stats.nr_thp_failed,
>>> + stats.nr_thp_split, mode, reason);
>>>
>>> if (ret_succeeded)
>>> - *ret_succeeded = nr_succeeded;
>>> + *ret_succeeded = stats.nr_succeeded;
>>>
>>> return rc;
>>> }