[RFC PATCH 4/5] mm/hmm: add support for peer to peer to HMM device memory

From: jglisse
Date: Tue Jan 29 2019 - 12:48:00 EST


From: JÃrÃme Glisse <jglisse@xxxxxxxxxx>

Signed-off-by: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
Cc: Logan Gunthorpe <logang@xxxxxxxxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: Rafael J. Wysocki <rafael@xxxxxxxxxx>
Cc: Bjorn Helgaas <bhelgaas@xxxxxxxxxx>
Cc: Christian Koenig <christian.koenig@xxxxxxx>
Cc: Felix Kuehling <Felix.Kuehling@xxxxxxx>
Cc: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
Cc: linux-pci@xxxxxxxxxxxxxxx
Cc: dri-devel@xxxxxxxxxxxxxxxxxxxxx
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Marek Szyprowski <m.szyprowski@xxxxxxxxxxx>
Cc: Robin Murphy <robin.murphy@xxxxxxx>
Cc: Joerg Roedel <jroedel@xxxxxxx>
Cc: iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx
---
include/linux/hmm.h | 47 +++++++++++++++++++++++++++++++++
mm/hmm.c | 63 +++++++++++++++++++++++++++++++++++++++++----
2 files changed, 105 insertions(+), 5 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 4a1454e3efba..7a3ac182cc48 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -710,6 +710,53 @@ struct hmm_devmem_ops {
const struct page *page,
unsigned int flags,
pmd_t *pmdp);
+
+ /*
+ * p2p_map() - map page for peer to peer between device
+ * @devmem: device memory structure (see struct hmm_devmem)
+ * @range: range of virtual address that is being mapped
+ * @device: device the range is being map to
+ * @addr: first virtual address in the range to consider
+ * @pa: device address (where actual mapping is store)
+ * Returns: number of page successfuly mapped, 0 otherwise
+ *
+ * Map page belonging to devmem to another device for peer to peer
+ * access. Device can decide not to map in which case memory will
+ * be migrated to main memory.
+ *
+ * Also there is no garantee that all the pages in the range does
+ * belongs to the devmem so it is up to the function to check that
+ * every single page does belong to devmem.
+ *
+ * Note for now we do not care about error exect error, so on failure
+ * function should just return 0.
+ */
+ long (*p2p_map)(struct hmm_devmem *devmem,
+ struct hmm_range *range,
+ struct device *device,
+ unsigned long addr,
+ dma_addr_t *pas);
+
+ /*
+ * p2p_unmap() - unmap page from peer to peer between device
+ * @devmem: device memory structure (see struct hmm_devmem)
+ * @range: range of virtual address that is being mapped
+ * @device: device the range is being map to
+ * @addr: first virtual address in the range to consider
+ * @pa: device address (where actual mapping is store)
+ * Returns: number of page successfuly unmapped, 0 otherwise
+ *
+ * Unmap page belonging to devmem previously map with p2p_map().
+ *
+ * Note there is no garantee that all the pages in the range does
+ * belongs to the devmem so it is up to the function to check that
+ * every single page does belong to devmem.
+ */
+ unsigned long (*p2p_unmap)(struct hmm_devmem *devmem,
+ struct hmm_range *range,
+ struct device *device,
+ unsigned long addr,
+ dma_addr_t *pas);
};

/*
diff --git a/mm/hmm.c b/mm/hmm.c
index 1a444885404e..fd49b1e116d0 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1193,16 +1193,19 @@ long hmm_range_dma_map(struct hmm_range *range,
dma_addr_t *daddrs,
bool block)
{
- unsigned long i, npages, mapped, page_size;
+ unsigned long i, npages, mapped, page_size, addr;
long ret;

+again:
ret = hmm_range_fault(range, block);
if (ret <= 0)
return ret ? ret : -EBUSY;

+ mapped = 0;
+ addr = range->start;
page_size = hmm_range_page_size(range);
npages = (range->end - range->start) >> range->page_shift;
- for (i = 0, mapped = 0; i < npages; ++i) {
+ for (i = 0; i < npages; ++i, addr += page_size) {
enum dma_data_direction dir = DMA_FROM_DEVICE;
struct page *page;

@@ -1226,6 +1229,29 @@ long hmm_range_dma_map(struct hmm_range *range,
goto unmap;
}

+ if (is_device_private_page(page)) {
+ struct hmm_devmem *devmem = page->pgmap->data;
+
+ if (!devmem->ops->p2p_map || !devmem->ops->p2p_unmap) {
+ /* Fall-back to main memory. */
+ range->default_flags |=
+ range->flags[HMM_PFN_DEVICE_PRIVATE];
+ goto again;
+ }
+
+ ret = devmem->ops->p2p_map(devmem, range, device,
+ addr, daddrs);
+ if (ret <= 0) {
+ /* Fall-back to main memory. */
+ range->default_flags |=
+ range->flags[HMM_PFN_DEVICE_PRIVATE];
+ goto again;
+ }
+ mapped += ret;
+ i += ret;
+ continue;
+ }
+
/* If it is read and write than map bi-directional. */
if (range->pfns[i] & range->values[HMM_PFN_WRITE])
dir = DMA_BIDIRECTIONAL;
@@ -1242,7 +1268,9 @@ long hmm_range_dma_map(struct hmm_range *range,
return mapped;

unmap:
- for (npages = i, i = 0; (i < npages) && mapped; ++i) {
+ npages = i;
+ addr = range->start;
+ for (i = 0; (i < npages) && mapped; ++i, addr += page_size) {
enum dma_data_direction dir = DMA_FROM_DEVICE;
struct page *page;

@@ -1253,6 +1281,18 @@ long hmm_range_dma_map(struct hmm_range *range,
if (dma_mapping_error(device, daddrs[i]))
continue;

+ if (is_device_private_page(page)) {
+ struct hmm_devmem *devmem = page->pgmap->data;
+ unsigned long inc;
+
+ inc = devmem->ops->p2p_unmap(devmem, range, device,
+ addr, &daddrs[i]);
+ BUG_ON(inc > npages);
+ mapped += inc;
+ i += inc;
+ continue;
+ }
+
/* If it is read and write than map bi-directional. */
if (range->pfns[i] & range->values[HMM_PFN_WRITE])
dir = DMA_BIDIRECTIONAL;
@@ -1285,7 +1325,7 @@ long hmm_range_dma_unmap(struct hmm_range *range,
dma_addr_t *daddrs,
bool dirty)
{
- unsigned long i, npages, page_size;
+ unsigned long i, npages, page_size, addr;
long cpages = 0;

/* Sanity check. */
@@ -1298,7 +1338,7 @@ long hmm_range_dma_unmap(struct hmm_range *range,

page_size = hmm_range_page_size(range);
npages = (range->end - range->start) >> range->page_shift;
- for (i = 0; i < npages; ++i) {
+ for (i = 0, addr = range->start; i < npages; ++i, addr += page_size) {
enum dma_data_direction dir = DMA_FROM_DEVICE;
struct page *page;

@@ -1318,6 +1358,19 @@ long hmm_range_dma_unmap(struct hmm_range *range,
set_page_dirty(page);
}

+ if (is_device_private_page(page)) {
+ struct hmm_devmem *devmem = page->pgmap->data;
+ unsigned long ret;
+
+ BUG_ON(!devmem->ops->p2p_unmap);
+
+ ret = devmem->ops->p2p_unmap(devmem, range, device,
+ addr, &daddrs[i]);
+ BUG_ON(ret > npages);
+ i += ret;
+ continue;
+ }
+
/* Unmap and clear pfns/dma address */
dma_unmap_page(device, daddrs[i], page_size, dir);
range->pfns[i] = range->values[HMM_PFN_NONE];
--
2.17.2