[PATCH] Simple Page Coloring (2.3.99pre3 diffs)

From: Joseph A. Martin (jmartin@linux08.mro.dec.com)
Date: Tue Apr 11 2000 - 16:24:48 EST


A simple but effective page coloring patch
------------------------------------------
Page coloring is the assignment of physical pages to virtual mappings
in a manner which attempts to avoid cache conflicts. One approach is
to match (modulo the cache size) the addresses of virtual and physical
pages. The approach taken by this patch is to assemble non-colliding,
cache-filling collections of physical pages and then to distribute
them in order as single user pages are requested.

Compared to a virtual-to-physical scheme, this temporal-to-physical
page assignment represents a conscious tradeoff between perfect
function on the one hand and smaller size and complexity on the other.
Of course, someone else may be able to figure out how to have it all.
;-)

The patch consists of only seven files. One is new; the others all
have small changes. It is Alpha-specific but only because I haven't
presumed to change "config.in" or "cache.h" for other architectures.
The changes are easy.

Characteristics (the ones I can think of at the moment):
 - small, self-contained text and data additions to the kernel
 - uses large buddy blocks to assemble "collections" quickly
 - Fragmentation of buddy blocks appears to degrade performance over
   the life of the system, but so far I am surprised at how little;
   performance remains significantly above no coloring.
 - Multiple processes play well together if they don't fill the cache.
 - Any one cache-filling process would likely do better with the other,
   virtual-to-physical match approach.
 - oriented toward physically indexed caches
 - less improvement for highly associative (e.g. Intel) caches
 - compatible with future superpage (e.g. MIPS, Alpha) work

Give it a try on your favorite cache-filling application and let me
know what you think. Thanks.

\Joe
 Joseph.Martin@Compaq.com Compaq Computer Corporation
                                        200 Forest Street MRO1-2/K20
 (508) 467-2100 Marlboro, MA 01752 U.S.A.

--- ../linux-2.3.99-pre3/include/linux/mm.h Thu Mar 23 18:36:06 2000
+++ ../patch/include/linux/mm.h Tue Apr 11 16:27:30 2000
@@ -295,6 +295,38 @@
 extern mem_map_t * mem_map;
 
 /*
+ * GFP bitmasks..
+ */
+#define __GFP_WAIT 0x01
+#define __GFP_HIGH 0x02
+#define __GFP_IO 0x04
+#define __GFP_DMA 0x08
+#ifdef CONFIG_HIGHMEM
+#define __GFP_HIGHMEM 0x10
+#else
+#define __GFP_HIGHMEM 0x0 /* noop */
+#endif
+
+
+#define GFP_BUFFER (__GFP_HIGH | __GFP_WAIT)
+#define GFP_ATOMIC (__GFP_HIGH)
+#define GFP_USER (__GFP_WAIT | __GFP_IO)
+#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
+#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
+#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
+#define GFP_KSWAPD (__GFP_IO)
+
+/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
+ platforms, used as appropriate on others */
+
+#define GFP_DMA __GFP_DMA
+
+/* Flag - indicates that the buffer can be taken from high memory which is not
+ permanently map ped by the kernel */
+
+#define GFP_HIGHMEM __GFP_HIGHMEM
+
+/*
  * There is only one page-allocator function, and two main namespaces to
  * it. The alloc_page*() variants return 'struct page *' and as such
  * can allocate highmem pages, the *get*page*() variants return
@@ -320,14 +352,23 @@
 extern struct page * alloc_pages(int gfp_mask, unsigned long order);
 #endif /* !CONFIG_DISCONTIGMEM */
 
+#if defined(CONFIG_PAGE_COLORING)
+extern struct page * _alloc_color_page(void);
+#define alloc_color_page(_MASK_) \
+ ((((_MASK_) & GFP_USER) == GFP_USER) ? _alloc_color_page() : 0)
+#else
+#define alloc_color_page(_MASK_) 0
+#endif
+
 #define alloc_page(gfp_mask) \
- alloc_pages(gfp_mask, 0)
+ (alloc_color_page(gfp_mask) ?: alloc_pages(gfp_mask, 0))
 
 extern inline unsigned long __get_free_pages (int gfp_mask, unsigned long order)
 {
         struct page * page;
 
- page = alloc_pages(gfp_mask, order);
+ if (order > 0 || (page = alloc_color_page(gfp_mask)) == 0)
+ page = alloc_pages(gfp_mask, order);
         if (!page)
                 return 0;
         return page_address(page);
@@ -455,38 +496,6 @@
                         size_t size, unsigned int flags);
 extern struct page *filemap_nopage(struct vm_area_struct * area,
                                     unsigned long address, int no_share);
-
-/*
- * GFP bitmasks..
- */
-#define __GFP_WAIT 0x01
-#define __GFP_HIGH 0x02
-#define __GFP_IO 0x04
-#define __GFP_DMA 0x08
-#ifdef CONFIG_HIGHMEM
-#define __GFP_HIGHMEM 0x10
-#else
-#define __GFP_HIGHMEM 0x0 /* noop */
-#endif
-
-
-#define GFP_BUFFER (__GFP_HIGH | __GFP_WAIT)
-#define GFP_ATOMIC (__GFP_HIGH)
-#define GFP_USER (__GFP_WAIT | __GFP_IO)
-#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
-#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
-#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
-#define GFP_KSWAPD (__GFP_IO)
-
-/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
- platforms, used as appropriate on others */
-
-#define GFP_DMA __GFP_DMA
-
-/* Flag - indicates that the buffer can be taken from high memory which is not
- permanently mapped by the kernel */
-
-#define GFP_HIGHMEM __GFP_HIGHMEM
 
 /* vma is the first one with address < vma->vm_end,
  * and even address < vma->vm_start. Have to extend vma. */
--- ../linux-2.3.99-pre3/mm/coloring.c Wed Apr 5 14:52:39 2000
+++ ../patch/mm/coloring.c Tue Apr 11 16:36:46 2000
@@ -0,0 +1,120 @@
+/*
+ * Written by Joseph A. Martin, Compaq Computer Corp., 11 April 2000.
+ */
+#include <linux/mm.h>
+#include <asm/cache.h>
+
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+#ifndef LN2_MAX_CACHE
+/* If this isn't defined in include/asm/cache.h, default to a guess for i386. */
+#define LN2_MIN_CACHE 19
+#define LN2_MAX_CACHE 22
+#endif
+
+/*
+ * Page coloring supplies single user pages which correspond to
+ * consecutive locations in a physically indexed secondary cache.
+ * The intent is spread application memory evenly through the cache
+ * and thereby improve cache efficiency.
+ *
+ * Page coloring stands the buddy system on its head, allocating the
+ * largest available blocks to cover the secondary cache. If there is
+ * no single block as large as the cache, it recursively assembles a
+ * collection of smaller blocks, which are disjoint modulo the biggest
+ * cache size. Therefore, it helps -- but is not required -- for
+ * MAX_ORDER > LN2_MAX_CACHE - PAGE_SHIFT
+ *
+ * The one subtle thing that is going on here is that multi-page
+ * blocks are being allocated, but single pages are being freed. It
+ * works because the buddy system keeps track of the parity of
+ * allocation, not the count (0, 1, or 2) of buddies free or available.
+ */
+
+/* This locks keeps the single_pages queue consistent. */
+static spinlock_t single_page_lock = SPIN_LOCK_UNLOCKED;
+
+/* This lock single-threads page production. */
+static spinlock_t single_page_producer_lock = SPIN_LOCK_UNLOCKED;
+
+static LIST_HEAD(single_pages);
+
+static inline int
+get_named_block(long order, unsigned long name)
+{
+ unsigned long flags;
+ int status = 0;
+ long match_mask = (-1L << (order+PAGE_SHIFT)) & ~(-1L << LN2_MAX_CACHE);
+ struct page * page;
+ unsigned long addr = 0;
+ struct list_head reject;
+
+ INIT_LIST_HEAD(&reject);
+
+ while ((page = alloc_pages(GFP_USER, order)) != 0) {
+ addr = page_address(page);
+ if (((addr ^ name) & match_mask) == 0)
+ break;
+ else
+ list_add(&page->list, &reject);
+ }
+
+ /* If necessary, recurse to smaller blocks but not too deeply. */
+ status = (addr != 0) ||
+ ((order > MAX(6, LN2_MIN_CACHE - PAGE_SHIFT - 2)) &&
+ (get_named_block(order - 1, name | (1 << (order + PAGE_SHIFT - 1)))
+ | get_named_block(order - 1, name)));
+
+ /*
+ * We hang on to the rejects until now so that the recursion
+ * above won't revisit them by way of splitting larger blocks.
+ */
+ while (!list_empty(&reject)) {
+ struct page *r = (struct page *)reject.next;
+ list_del(&r->list);
+ __free_pages(r, order);
+ }
+
+ if (addr != 0) {
+ int i;
+
+ /* Break up the block into single pages. */
+ spin_lock_irqsave(&single_page_lock, flags);
+ for (i = MAP_NR(addr) + (1<<order) - 1; i>=MAP_NR(addr); --i) {
+ list_add(&mem_map[i].list, &single_pages);
+ set_page_count(&mem_map[i], 1);
+ }
+ spin_unlock_irqrestore(&single_page_lock, flags);
+ }
+
+ return status;
+}
+
+struct page *
+_alloc_color_page(void)
+{
+ unsigned long flags;
+ struct page * page;
+
+ spin_lock_irqsave(&single_page_lock, flags);
+
+ while (list_empty(&single_pages)) {
+ int status = 0;
+
+ spin_unlock_irqrestore(&single_page_lock, flags);
+ if (spin_trylock(&single_page_producer_lock)) {
+ status = get_named_block(LN2_MAX_CACHE - PAGE_SHIFT, 0);
+ spin_unlock(&single_page_producer_lock);
+ }
+ if (status == 0) {
+ /* Failover to alloc_pages(GFP_USER, 0) */
+ return (struct page *)0;
+ }
+ spin_lock_irqsave(&single_page_lock, flags);
+ }
+
+ page = (struct page *)single_pages.next;
+ list_del(&page->list);
+ spin_unlock_irqrestore(&single_page_lock, flags);
+ return page;
+}
--- ../linux-2.3.99-pre3/mm/Makefile Mon Dec 6 13:14:13 1999
+++ ../patch/mm/Makefile Tue Apr 4 16:23:58 2000
@@ -16,4 +16,8 @@
 O_OBJS += highmem.o
 endif
 
+ifeq ($(CONFIG_PAGE_COLORING),y)
+O_OBJS += coloring.o
+endif
+
 include $(TOPDIR)/Rules.make
--- ../linux-2.3.99-pre3/include/asm-alpha/cache.h Mon Dec 6 20:15:53 1999
+++ ../patch/include/asm-alpha/cache.h Tue Apr 4 16:23:58 2000
@@ -10,4 +10,7 @@
 #define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 
+/* Size range for the secondary cache of the "latest and greatest" (EV6) */
+#define LN2_MIN_CACHE 21
+#define LN2_MAX_CACHE 24
 #endif
--- ../linux-2.3.99-pre3/kernel/ksyms.c Thu Mar 23 01:15:57 2000
+++ ../patch/kernel/ksyms.c Wed Apr 5 14:33:11 2000
@@ -97,6 +97,9 @@
 EXPORT_SYMBOL(__alloc_pages);
 EXPORT_SYMBOL(alloc_pages_node);
 EXPORT_SYMBOL(__free_pages_ok);
+#ifdef CONFIG_PAGE_COLORING
+EXPORT_SYMBOL(_alloc_color_page);
+#endif
 #ifndef CONFIG_DISCONTIGMEM
 EXPORT_SYMBOL(contig_page_data);
 #endif
--- ../linux-2.3.99-pre3/arch/alpha/config.in Thu Mar 23 17:45:04 2000
+++ ../patch/arch/alpha/config.in Wed Apr 5 12:01:40 2000
@@ -183,6 +183,10 @@
         bool 'Symmetric multi-processing support' CONFIG_SMP
 fi
 
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+ bool 'Secondary cache optimization' CONFIG_PAGE_COLORING
+fi
+
 source drivers/pci/Config.in
 
 bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG
--- ../linux-2.3.99-pre3/Documentation/Configure.help Thu Mar 23 11:38:57 2000
+++ ../patch/Documentation/Configure.help Tue Apr 4 16:23:58 2000
@@ -2066,6 +2066,14 @@
   SRM is modified. If you say Y, the existing PCI configuration will
   be left intact.
 
+Secondary cache optimization
+CONFIG_PAGE_COLORING
+ If possible, consecutive requests for single user pages are filled
+ with pages physically consecutive modulo the largest secondary cache
+ size. This reduces the amount of secondary cache conflict within an
+ application and is most useful for applications using large amounts
+ of memory for data.
+
 Non-standard serial port support
 CONFIG_SERIAL_NONSTANDARD
   Say Y here if you have any non-standard serial boards -- boards

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Apr 15 2000 - 21:00:16 EST