Re: lockless poll() (was Re: namei() query)

From: Manfred Spraul (manfreds@colorfullife.com)
Date: Mon Apr 24 2000 - 14:54:43 EST


Linus Torvalds wrote:
>
> you won't get unbalanced spikes like you get with allocating and
> freeigna whole address space when a process is created and dies on
> different CPU's).
>
client process sends a 100 byte request with SysV msg to server, and
server returns 200 bytes :-/

Anyway, I've attached my 2 patches:
* use kmalloc(PAGE_SIZE) instead of get_free_page():
        select, file table allocations and getname.

* optimize kmalloc for fixed size allocation, but this had virtually no
impact on my benchmarks.

// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 3
// SUBLEVEL = 99
// EXTRAVERSION = -pre5
--- 2.3/include/linux/fs.h Wed Apr 12 15:00:31 2000
+++ build-2.3/include/linux/fs.h Mon Apr 24 21:23:51 2000
@@ -847,8 +847,8 @@
 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char *);
-#define __getname() ((char *) __get_free_page(GFP_KERNEL))
-#define putname(name) free_page((unsigned long)(name))
+#define __getname() ((char *) kmalloc(PAGE_SIZE, GFP_KERNEL))
+#define putname(name) kfree((name))
 
 enum {BDEV_FILE, BDEV_SWAP, BDEV_FS, BDEV_RAW};
 extern void kill_fasync(struct fasync_struct *, int, int);
--- 2.3/fs/open.c Wed Apr 12 15:00:28 2000
+++ build-2.3/fs/open.c Mon Apr 24 21:31:22 2000
@@ -10,6 +10,8 @@
 #include <linux/file.h>
 #include <linux/smp_lock.h>
 #include <linux/quotaops.h>
+#include <linux/slab.h>
+
 
 #include <asm/uaccess.h>
 
--- 2.3/fs/select.c Thu Feb 10 22:39:09 2000
+++ build-2.3/fs/select.c Mon Apr 24 21:34:32 2000
@@ -48,7 +48,7 @@
         poll_table* out;
         poll_table* walk;
 
- out = (poll_table *) __get_free_page(GFP_KERNEL);
+ out = (poll_table *) kmalloc(PAGE_SIZE,GFP_KERNEL);
         if(out==NULL)
                 return NULL;
         out->nr = 0;
@@ -57,11 +57,11 @@
         nfds -=__MAX_POLL_TABLE_ENTRIES;
         walk = out;
         while(nfds > 0) {
- poll_table *tmp = (poll_table *) __get_free_page(GFP_KERNEL);
+ poll_table *tmp = (poll_table *) kmalloc(PAGE_SIZE,GFP_KERNEL);
                 if (!tmp) {
                         while(out != NULL) {
                                 tmp = out->next;
- free_page((unsigned long)out);
+ kfree((unsigned long)out);
                                 out = tmp;
                         }
                         return NULL;
@@ -440,14 +440,14 @@
         nchunks = 0;
         nleft = nfds;
         while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */
- fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
+ fds[nchunks] = (struct pollfd *)kmalloc(PAGE_SIZE, GFP_KERNEL);
                 if (fds[nchunks] == NULL)
                         goto out_fds;
                 nchunks++;
                 nleft -= POLLFD_PER_PAGE;
         }
         if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */
- fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
+ fds[nchunks] = (struct pollfd *)kmalloc(PAGE_SIZE, GFP_KERNEL);
                 if (fds[nchunks] == NULL)
                         goto out_fds;
         }
@@ -480,10 +480,10 @@
 
 out_fds1:
         if (nleft)
- free_page((unsigned long)(fds[nchunks]));
+ kfree((unsigned long)(fds[nchunks]));
 out_fds:
         for (i=0; i < nchunks; i++)
- free_page((unsigned long)(fds[i]));
+ kfree((unsigned long)(fds[i]));
         if (nfds != 0)
                 kfree(fds);
 out:
--- 2.3/fs/file.c Mon Nov 8 16:26:39 1999
+++ build-2.3/fs/file.c Mon Apr 24 21:36:43 2000
@@ -24,10 +24,8 @@
         struct file **new_fds;
         int size = num * sizeof(struct file *);
 
- if (size < PAGE_SIZE)
+ if (size <= PAGE_SIZE)
                 new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
- else if (size == PAGE_SIZE)
- new_fds = (struct file **) __get_free_page(GFP_KERNEL);
         else
                 new_fds = (struct file **) vmalloc(size);
         return new_fds;
@@ -44,10 +42,8 @@
 
         if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */
                 return;
- else if (size < PAGE_SIZE)
+ else if (size <= PAGE_SIZE)
                 kfree(array);
- else if (size == PAGE_SIZE)
- free_page((unsigned long) array);
         else
                 vfree(array);
 }
@@ -137,11 +133,9 @@
         fd_set *new_fdset;
         int size = num / 8;
 
- if (size < PAGE_SIZE)
+ if (size <= PAGE_SIZE)
                 new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
- else if (size == PAGE_SIZE)
- new_fdset = (fd_set *) __get_free_page(GFP_KERNEL);
- else
+ else
                 new_fdset = (fd_set *) vmalloc(size);
         return new_fdset;
 }
@@ -157,10 +151,8 @@
         
         if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */
                 return;
- else if (size < PAGE_SIZE)
+ else if (size <= PAGE_SIZE)
                 kfree(array);
- else if (size == PAGE_SIZE)
- free_page((unsigned long) array);
         else
                 vfree(array);
 }


// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 3
// SUBLEVEL = 99
// EXTRAVERSION = -pre5
--- 2.3/kernel/ksyms.c Wed Apr 12 15:00:33 2000
+++ build-2.3/kernel/ksyms.c Mon Apr 24 19:42:22 2000
@@ -107,7 +107,8 @@
 EXPORT_SYMBOL(kmem_cache_shrink);
 EXPORT_SYMBOL(kmem_cache_alloc);
 EXPORT_SYMBOL(kmem_cache_free);
-EXPORT_SYMBOL(kmalloc);
+EXPORT_SYMBOL(cache_sizes);
+EXPORT_SYMBOL(__kmalloc);
 EXPORT_SYMBOL(kfree);
 EXPORT_SYMBOL(kfree_s);
 EXPORT_SYMBOL(vmalloc);
--- 2.3/mm/slab.c Wed Apr 12 15:00:33 2000
+++ build-2.3/mm/slab.c Mon Apr 24 19:38:59 2000
@@ -324,13 +324,8 @@
 #define SLAB_SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
 #define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->list.prev)
 
-/* Size description struct for general caches. */
-typedef struct cache_sizes {
- size_t cs_size;
- kmem_cache_t *cs_cachep;
-} cache_sizes_t;
-
-static cache_sizes_t cache_sizes[] = {
+/* these values are hardcoded in <linux/kmalloc.h> */
+cache_sizes_t cache_sizes[] = {
 #if PAGE_SIZE == 4096
         { 32, NULL},
 #endif
@@ -1680,7 +1675,7 @@
 }
 
 void *
-kmalloc(size_t size, int flags)
+__kmalloc(size_t size, int flags)
 {
         cache_sizes_t *csizep = cache_sizes;
 
--- 2.3/include/linux/slab.h Sat Feb 12 20:42:24 2000
+++ build-2.3/include/linux/slab.h Mon Apr 24 21:28:28 2000
@@ -56,10 +56,6 @@
 extern void *kmem_cache_alloc(kmem_cache_t *, int);
 extern void kmem_cache_free(kmem_cache_t *, void *);
 
-extern void *kmalloc(size_t, int);
-extern void kfree(const void *);
-extern void kfree_s(const void *, size_t);
-
 extern void kmem_cache_reap(int);
 extern int get_slabinfo(char *);
 
@@ -67,6 +63,56 @@
 extern kmem_cache_t *vm_area_cachep;
 extern kmem_cache_t *mm_cachep;
 
-#endif /* __KERNEL__ */
+/* generic kmalloc */
+extern void *__kmalloc(size_t, int);
+extern void kfree(const void *);
+extern void kfree_s(const void *, size_t);
 
+/* Size description struct for general caches. */
+typedef struct cache_sizes {
+ size_t cs_size;
+ kmem_cache_t *cs_cachep;
+} cache_sizes_t;
+
+extern cache_sizes_t cache_sizes[];
+extern void __you_cannot_kmalloc_more_than_128_kilo_bytes(void);
+
+static inline void* __constant_kmalloc(size_t size, int flags)
+{
+#if PAGE_SIZE == 4096
+ if(size < 32)
+ return kmem_cache_alloc(cache_sizes[0].cs_cachep, flags);
+#define KSHIFT 0
+#else
+#define KSHIFT 1
+#endif
+#define FIXED_ALLOC(len,off) \
+ if(size < len) \
+ return kmem_cache_alloc(cache_sizes[off-KSHIFT].cs_cachep, flags)
+ FIXED_ALLOC(64,1);
+ FIXED_ALLOC(128,2);
+ FIXED_ALLOC(256,3);
+ FIXED_ALLOC(512,4);
+ FIXED_ALLOC(1024,5);
+ FIXED_ALLOC(2048,6);
+ FIXED_ALLOC(4096,7);
+ FIXED_ALLOC(8192,8);
+ FIXED_ALLOC(16384,9);
+ FIXED_ALLOC(32768,10);
+ FIXED_ALLOC(65536,11);
+ FIXED_ALLOC(131072,12);
+#undef FIXED_ALLOC
+#undef KSHIFT
+ __you_cannot_kmalloc_more_than_128_kilo_bytes();
+ return NULL;
+}
+
+extern void *kmem_cache_alloc(kmem_cache_t *, int);
+
+#define kmalloc(size, flags) \
+ (__builtin_constant_p(size) ? \
+ __constant_kmalloc((size),(flags)) : \
+ __kmalloc(size,flags))
+
+#endif /* __KERNEL__ */
 #endif /* _LINUX_SLAB_H */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Apr 30 2000 - 21:00:08 EST