Re: [PATCH] mm/gup.c: Simplify and fix check_and_migrate_movable_pages() return codes

From: Jason Gunthorpe
Date: Fri Jul 29 2022 - 15:44:32 EST


On Fri, Jul 29, 2022 at 12:46:45PM +1000, Alistair Popple wrote:
> When pinning pages with FOLL_LONGTERM check_and_migrate_movable_pages()
> is called to migrate pages out of zones which should not contain any
> longterm pinned pages.
>
> When migration succeeds all pages will have been unpinned so pinning
> needs to be retried. This is indicated by returning zero. When all pages
> are in the correct zone the number of pinned pages is returned.
>
> However migration can also fail, in which case pages are unpinned and
> -ENOMEM is returned. However if the failure was due to not being unable
> to isolate a page zero is returned. This leads to indefinite looping in
> __gup_longterm_locked().
>
> Fix this by simplifying the return codes such that zero indicates all
> pages were successfully pinned in the correct zone while errors indicate
> either pages were migrated and pinning should be retried or that
> migration has failed and therefore the pinning operation should fail.
>
> Signed-off-by: Alistair Popple <apopple@xxxxxxxxxx>
> ---
> mm/gup.c | 46 +++++++++++++++++++++++-----------------------
> 1 file changed, 23 insertions(+), 23 deletions(-)

I have to say I prefer the usual style where all the places that error
exit do 'goto error' instead of trying to keep track in 'ret'

AFAICT there is no reason to 'continue' in most of these paths since
we intend to return to userspace with an error anyhow? Why try to
isolate more pages?

> @@ -1980,19 +1980,18 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
> folio_nr_pages(folio));
> }
>
> - if (!list_empty(&movable_page_list) || isolation_error_count
> - || coherent_pages)
> - goto unpin_pages;
> -
> /*
> * If list is empty, and no isolation errors, means that all pages are
> - * in the correct zone.
> + * in the correct zone. If there were device coherent pages some pages
> + * have been unpinned.
> */

That comment is a bit confusing.. I guess it is trying to explain why
coherent_pages is doing?

Maybe just:

All the given pages are fine, nothing was done

> + if (list_empty(&movable_page_list) && !ret && !coherent_pages)
> + return 0;
>
> -unpin_pages:

Now that this label is removed this if following it

if (!list_empty(&movable_page_list)) {

is also now unneeded because the above 'return 0' already checked it

I came up with this ontop:

diff --git a/mm/gup.c b/mm/gup.c
index 9e7c76d1e4ee3c..eddcf3c0eba727 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1912,11 +1912,15 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
struct page **pages,
unsigned int gup_flags)
{
+ struct migration_target_control mtc = {
+ .nid = NUMA_NO_NODE,
+ .gfp_mask = GFP_USER | __GFP_NOWARN,
+ };
unsigned long i;
struct folio *prev_folio = NULL;
LIST_HEAD(movable_page_list);
bool drain_allow = true, coherent_pages = false;
- int ret = 0;
+ int ret = -EBUSY;

for (i = 0; i < nr_pages; i++) {
struct folio *folio = page_folio(pages[i]);
@@ -1948,10 +1952,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
unpin_user_page(&folio->page);
}

- if (migrate_device_coherent_page(&folio->page)) {
- ret = -EBUSY;
- break;
- }
+ if (migrate_device_coherent_page(&folio->page))
+ goto error;
continue;
}

@@ -1963,7 +1965,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
if (folio_test_hugetlb(folio)) {
if (isolate_hugetlb(&folio->page,
&movable_page_list))
- ret = -EBUSY;
+ goto error;
continue;
}

@@ -1972,10 +1974,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
drain_allow = false;
}

- if (folio_isolate_lru(folio)) {
- ret = -EBUSY;
- continue;
- }
+ if (folio_isolate_lru(folio))
+ goto error;
list_add_tail(&folio->lru, &movable_page_list);
node_stat_mod_folio(folio,
NR_ISOLATED_ANON + folio_is_file_lru(folio),
@@ -1987,7 +1987,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
* in the correct zone. If there were device coherent pages some pages
* have been unpinned.
*/
- if (list_empty(&movable_page_list) && !ret && !coherent_pages)
+ if (list_empty(&movable_page_list) && !coherent_pages)
return 0;

/*
@@ -2005,23 +2005,19 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
put_page(pages[i]);
}

- if (!list_empty(&movable_page_list)) {
- struct migration_target_control mtc = {
- .nid = NUMA_NO_NODE,
- .gfp_mask = GFP_USER | __GFP_NOWARN,
- };
-
- ret = migrate_pages(&movable_page_list, alloc_migration_target,
- NULL, (unsigned long)&mtc, MIGRATE_SYNC,
- MR_LONGTERM_PIN, NULL);
- if (ret > 0) /* number of pages not migrated */
- ret = -ENOMEM;
+ not_migrated = migrate_pages(&movable_page_list, alloc_migration_target,
+ NULL, (unsigned long)&mtc, MIGRATE_SYNC,
+ MR_LONGTERM_PIN, NULL);
+ if (not_migrated > 0) {
+ ret = -ENOMEM;
+ goto error;
}
+ return -EAGAIN;

- if (ret && !list_empty(&movable_page_list))
+error:
+ if (!list_empty(&movable_page_list))
putback_movable_pages(&movable_page_list);
-
- return ret ? ret : -EAGAIN;
+ return ret;
}
#else
static long check_and_migrate_movable_pages(unsigned long nr_pages,