[PATCH] mm/device-public-memory: device memory cache coherent with CPU

From: JÃrÃme Glisse
Date: Thu Jun 01 2017 - 11:25:59 EST


Platform with advance system bus (like CAPI or CCIX) allow device
memory to be accessible from CPU in a cache coherent fashion. Add
a new type of ZONE_DEVICE to represent such memory. The use case
are the same as for the un-addressable device memory but without
all the corners cases.

Signed-off-by: Jéme Glisse <jglisse@xxxxxxxxxx>
---
include/linux/ioport.h | 1 +
include/linux/memremap.h | 21 +++++++++++++++++++++
mm/Kconfig | 13 +++++++++++++
mm/memory.c | 13 +++++++++++++
mm/migrate.c | 23 ++++++++++++++---------
5 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 3a4f691..f5cf32e 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -131,6 +131,7 @@ enum {
IORES_DESC_PERSISTENT_MEMORY = 4,
IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5,
IORES_DESC_DEVICE_PRIVATE_MEMORY = 6,
+ IORES_DESC_DEVICE_PUBLIC_MEMORY = 7,
};

/* helpers to define resources */
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 0e0d2e6..b9f460a 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -56,10 +56,18 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
* page must be treated as an opaque object, rather than a "normal" struct page.
* A more complete discussion of unaddressable memory may be found in
* include/linux/hmm.h and Documentation/vm/hmm.txt.
+ *
+ * MEMORY_DEVICE_PUBLIC:
+ * Device memory that is cache coherent from device and CPU point of view. This
+ * is use on platform that have an advance system bus (like CAPI or CCIX). A
+ * driver can hotplug the device memory using ZONE_DEVICE and with that memory
+ * type. Any page of a process can be migrated to such memory. However no one
+ * should be allow to pin such memory so that it can always be evicted.
*/
enum memory_type {
MEMORY_DEVICE_PUBLIC = 0,
MEMORY_DEVICE_PRIVATE,
+ MEMORY_DEVICE_PUBLIC,
};

/*
@@ -91,6 +99,8 @@ enum memory_type {
* The page_free() callback is called once the page refcount reaches 1
* (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
* This allows the device driver to implement its own memory management.)
+ *
+ * For MEMORY_DEVICE_CACHE_COHERENT only the page_free() callback matter.
*/
typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
unsigned long addr,
@@ -133,6 +143,12 @@ static inline bool is_device_private_page(const struct page *page)
return is_zone_device_page(page) &&
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
}
+
+static inline bool is_device_public_page(const struct page *page)
+{
+ return is_zone_device_page(page) &&
+ page->pgmap->type == MEMORY_DEVICE_PUBLIC;
+}
#else
static inline void *devm_memremap_pages(struct device *dev,
struct resource *res, struct percpu_ref *ref,
@@ -156,6 +172,11 @@ static inline bool is_device_private_page(const struct page *page)
{
return false;
}
+
+static inline bool is_device_public_page(const struct page *page)
+{
+ return false;
+}
#endif

/**
diff --git a/mm/Kconfig b/mm/Kconfig
index 46296d5d7..bacb193 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -758,6 +758,19 @@ config DEVICE_PRIVATE
memory; i.e., memory that is only accessible from the device (or
group of devices).

+config DEVICE_PUBLIC
+ bool "Unaddressable device memory (GPU memory, ...)"
+ depends on X86_64
+ depends on ZONE_DEVICE
+ depends on MEMORY_HOTPLUG
+ depends on MEMORY_HOTREMOVE
+ depends on SPARSEMEM_VMEMMAP
+
+ help
+ Allows creation of struct pages to represent addressable device
+ memory; i.e., memory that is accessible from both the device and
+ the CPU
+
config FRAME_VECTOR
bool

diff --git a/mm/memory.c b/mm/memory.c
index eba61dd..d192f3d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -983,6 +983,19 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
get_page(page);
page_dup_rmap(page, false);
rss[mm_counter(page)]++;
+ } else if (pte_devmap(pte)) {
+ page = pte_page(pte);
+
+ /*
+ * Cache coherent device memory behave like regular page and
+ * not like persistent memory page. For more informations see
+ * MEMORY_DEVICE_CACHE_COHERENT in memory_hotplug.h
+ */
+ if (is_device_public_page(page)) {
+ get_page(page);
+ page_dup_rmap(page, false);
+ rss[mm_counter(page)]++;
+ }
}

out_set_pte:
diff --git a/mm/migrate.c b/mm/migrate.c
index d7c4db6..a0115b8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -229,12 +229,16 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
if (is_write_migration_entry(entry))
pte = maybe_mkwrite(pte, vma);

- if (unlikely(is_zone_device_page(new)) &&
- is_device_private_page(new)) {
- entry = make_device_private_entry(new, pte_write(pte));
- pte = swp_entry_to_pte(entry);
- if (pte_swp_soft_dirty(*pvmw.pte))
- pte = pte_mksoft_dirty(pte);
+ if (unlikely(is_zone_device_page(new))) {
+ if (is_device_private_page(new)) {
+ entry = make_device_private_entry(new, pte_write(pte));
+ pte = swp_entry_to_pte(entry);
+ if (pte_swp_soft_dirty(*pvmw.pte))
+ pte = pte_mksoft_dirty(pte);
+ } else if (is_device_public_page(new)) {
+ pte = pte_mkdevmap(pte);
+ flush_dcache_page(new);
+ }
} else
flush_dcache_page(new);

@@ -2300,9 +2304,10 @@ static bool migrate_vma_check_page(struct page *page)

/* Page from ZONE_DEVICE have one extra reference */
if (is_zone_device_page(page)) {
- if (is_device_private_page(page)) {
+ if (is_device_private_page(page) ||
+ is_device_public_page)
extra++;
- } else
+ else
/* Other ZONE_DEVICE memory type are not supported */
return false;
}
@@ -2621,7 +2626,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
continue;
}
- } else {
+ } else if (!is_device_public_page(newpage)) {
/*
* Other types of ZONE_DEVICE page are not
* supported.
--
2.4.11


--EVF5PPMfhYS0aIcm--