Re: [PATCH v2 09/11] mm/hmm: allow to mirror vma of a file on a DAX backed filesystem v2

From: Ira Weiny
Date: Thu Mar 28 2019 - 22:05:36 EST


On Mon, Mar 25, 2019 at 10:40:09AM -0400, Jerome Glisse wrote:
> From: Jérôme Glisse <jglisse@xxxxxxxxxx>
>
> HMM mirror is a device driver helpers to mirror range of virtual address.
> It means that the process jobs running on the device can access the same
> virtual address as the CPU threads of that process. This patch adds support
> for mirroring mapping of file that are on a DAX block device (ie range of
> virtual address that is an mmap of a file in a filesystem on a DAX block
> device). There is no reason to not support such case when mirroring virtual
> address on a device.
>
> Note that unlike GUP code we do not take page reference hence when we
> back-off we have nothing to undo.
>
> Changes since v1:
> - improved commit message
> - squashed: Arnd Bergmann: fix unused variable warning in hmm_vma_walk_pud
>
> Signed-off-by: Jérôme Glisse <jglisse@xxxxxxxxxx>
> Reviewed-by: Ralph Campbell <rcampbell@xxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
> Cc: John Hubbard <jhubbard@xxxxxxxxxx>
> Cc: Arnd Bergmann <arnd@xxxxxxxx>
> ---
> mm/hmm.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++---------
> 1 file changed, 111 insertions(+), 21 deletions(-)
>
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 64a33770813b..ce33151c6832 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -325,6 +325,7 @@ EXPORT_SYMBOL(hmm_mirror_unregister);
>
> struct hmm_vma_walk {
> struct hmm_range *range;
> + struct dev_pagemap *pgmap;
> unsigned long last;
> bool fault;
> bool block;
> @@ -499,6 +500,15 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
> range->flags[HMM_PFN_VALID];
> }
>
> +static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud)
> +{
> + if (!pud_present(pud))
> + return 0;
> + return pud_write(pud) ? range->flags[HMM_PFN_VALID] |
> + range->flags[HMM_PFN_WRITE] :
> + range->flags[HMM_PFN_VALID];
> +}
> +
> static int hmm_vma_handle_pmd(struct mm_walk *walk,
> unsigned long addr,
> unsigned long end,
> @@ -520,8 +530,19 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
> return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
>
> pfn = pmd_pfn(pmd) + pte_index(addr);
> - for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
> + for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) {
> + if (pmd_devmap(pmd)) {
> + hmm_vma_walk->pgmap = get_dev_pagemap(pfn,
> + hmm_vma_walk->pgmap);
> + if (unlikely(!hmm_vma_walk->pgmap))
> + return -EBUSY;
> + }
> pfns[i] = hmm_pfn_from_pfn(range, pfn) | cpu_flags;
> + }
> + if (hmm_vma_walk->pgmap) {
> + put_dev_pagemap(hmm_vma_walk->pgmap);
> + hmm_vma_walk->pgmap = NULL;
> + }
> hmm_vma_walk->last = end;
> return 0;
> }
> @@ -608,10 +629,24 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
> if (fault || write_fault)
> goto fault;
>
> + if (pte_devmap(pte)) {
> + hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte),
> + hmm_vma_walk->pgmap);
> + if (unlikely(!hmm_vma_walk->pgmap))
> + return -EBUSY;
> + } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) {
> + *pfn = range->values[HMM_PFN_SPECIAL];
> + return -EFAULT;
> + }
> +
> *pfn = hmm_pfn_from_pfn(range, pte_pfn(pte)) | cpu_flags;

<tag>

> return 0;
>
> fault:
> + if (hmm_vma_walk->pgmap) {
> + put_dev_pagemap(hmm_vma_walk->pgmap);
> + hmm_vma_walk->pgmap = NULL;
> + }
> pte_unmap(ptep);
> /* Fault any virtual address we were asked to fault */
> return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
> @@ -699,12 +734,83 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
> return r;
> }
> }
> + if (hmm_vma_walk->pgmap) {
> + put_dev_pagemap(hmm_vma_walk->pgmap);
> + hmm_vma_walk->pgmap = NULL;
> + }


Why is this here and not in hmm_vma_handle_pte()? Unless I'm just getting
tired this is the corresponding put when hmm_vma_handle_pte() returns 0 above
at <tag> above.

Ira