updated inode management for 2.1.57

Bill Hawes (whawes@star.net)
Sat, 27 Sep 1997 14:32:15 -0400


This is a multi-part message in MIME format.
--------------E35572AF69BF2F3F9DEFDAA0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

The attached patch makes some further changes in inode memory
management. If you leave the INODE_SLAB symbol defined, it implements
an alternative approach suggested by Kevin Buhr by using a SLAB cache
for inodes. This allows a shrink_inode() function to be implemented so
that kswapd can reclaim inode memory when system memory is low.

The shrink_inodes() function effectively provides a safe mechanism for
shrinking dcache memory from kswapd. Each time shrink_inodes() removes
inodes from the unused list, the VFS layer will attempt to restock the
unused list, pruning the dcache if necessary. This work is done by user
process rather than kswapd, so there's no danger of recursive loops or
deadlocks.

If you comment out the INODE_SLAB definition, the patch provides for
slightly more aggressive reclaiming of inodes before allocating
additional memory. I've set the goal to be proportional to the number
of inodes, so as you increase max_inodes the goals scale progressively.
If the nominal inode limit is exceeded, the code sets the goal to search
the entire inode list for reclaimable inodes.

I'm hoping that some people will test each of the alternatives, and
maybe do some benchmarking to see which way works better.

Regards,
Bill
--------------E35572AF69BF2F3F9DEFDAA0
Content-Type: text/plain; charset=us-ascii; name="inode_mem57-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="inode_mem57-patch"

--- linux-2.1.57/fs/inode.c.old Sat Sep 20 08:16:14 1997
+++ linux-2.1.57/fs/inode.c Sat Sep 27 13:59:41 1997
@@ -18,6 +18,17 @@
* Famous last words.
*/

+#define INODE_PARANOIA 1
+#define INODE_DEBUG 1
+
+#define INODE_SLAB 1
+#ifdef INODE_SLAB
+#include <linux/slab.h>
+static kmem_cache_t *inode_cachep;
+#endif
+
+extern void shrink_inodes(void); /* move to fs.h */
+
/*
* Inode lookup is no longer as critical as it used to be:
* most of the lookups are going to be through the dcache.
@@ -356,20 +393,29 @@
*/
static struct inode * grow_inodes(void)
{
+#ifndef INODE_SLAB
struct inode * inode;

/*
* Check whether to shrink the dcache ... if we've
* allocated more than half of the nominal maximum,
- * try shrinking before allocating more.
+ * try shrinking before allocating more.
*/
if (inodes_stat.nr_inodes >= (max_inodes >> 1)) {
struct list_head * tmp;
+ int goal = inodes_stat.nr_inodes >> 3;
+
+ /*
+ * If we're past the nominal inode limit,
+ * set the goal to examine _all_ inodes.
+ */
+ if (inodes_stat.nr_inodes >= max_inodes)
+ goal = inodes_stat.nr_inodes >> 1;

spin_unlock(&inode_lock);
- prune_dcache(128);
+ prune_dcache(goal);
spin_lock(&inode_lock);
- try_to_free_inodes(128);
+ try_to_free_inodes(goal);
tmp = inode_unused.next;
if (tmp != &inode_unused) {
inodes_stat.nr_free_inodes--;
@@ -399,6 +445,90 @@
inodes_stat.nr_inodes += PAGE_SIZE / sizeof(struct inode);
}
return inode;
+
+#else /* INODE_SLAB */
+ /*
+ * Use the SLAB allocator for inodes.
+ */
+ static unsigned long last_fail_time = 0;
+ struct inode * inode;
+ struct list_head * tmp;
+
+ spin_unlock(&inode_lock);
+ inode = kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
+ if (inode) {
+ init_once(inode);
+ spin_lock(&inode_lock);
+ inodes_stat.nr_inodes++;
+ return inode;
+ }
+
+ /*
+ * No free memory? Shrink the dcache and try to free an inode.
+ */
+ shrink_dcache();
+ spin_lock(&inode_lock);
+ try_to_free_inodes(inodes_stat.nr_inodes >> 1);
+ tmp = inode_unused.next;
+ if (tmp != &inode_unused) {
+ inodes_stat.nr_free_inodes--;
+ list_del(tmp);
+ inode = list_entry(tmp, struct inode, i_list);
+ return inode;
+ }
+ spin_unlock(&inode_lock);
+
+ /*
+ * Allocation failed ... check whether to report failure.
+ */
+ if (jiffies - last_fail_time > 10*HZ) {
+ last_fail_time = jiffies;
+ printk("grow_inodes: %d inodes, failed to grow\n",
+ inodes_stat.nr_inodes);
+ }
+ return NULL;
+#endif
+}
+
+/*
+ * Try to free some inodes from the unused list. Unused
+ * inodes are guaranteed to be "clean", so we can free
+ * them with no side effects.
+ *
+ * Note that this provides a _safe_ mechanism to reduce
+ * dcache memory from kswapd. Repeatedly freeing unused
+ * inodes will result in pruning the dcache to free more
+ * inodes, effectively draining the dcache in response to
+ * system memory needs. Since the work is being done by
+ * user processes, there's no danger of recursion loops
+ * or deadlock.
+ */
+void shrink_inodes(void)
+{
+#ifdef INODE_SLAB
+ int count = inodes_stat.nr_free_inodes >> 1;
+
+ if (count) {
+ struct list_head * tmp;
+#ifdef INODE_DEBUG
+printk("shrink_inodes: freeing %d\n", count);
+#endif
+ spin_lock(&inode_lock);
+ while ((tmp = inode_unused.next) != &inode_unused) {
+ struct inode * inode;
+ list_del(tmp);
+ inode = list_entry(tmp, struct inode, i_list);
+ inodes_stat.nr_free_inodes--;
+ inodes_stat.nr_inodes--;
+ spin_unlock(&inode_lock);
+ kmem_cache_free(inode_cachep, inode);
+ spin_lock(&inode_lock);
+ if (!--count)
+ break;
+ }
+ spin_unlock(&inode_lock);
+ }
+#endif
}

/*
@@ -657,6 +825,16 @@
head++;
i--;
} while (i);
+
+#ifdef INODE_SLAB
+ /*
+ * Create the inode SLAB cache.
+ */
+ inode_cachep = kmem_cache_create("inode", sizeof(struct inode), 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if(!inode_cachep)
+ panic("VFS: Cannot create inode SLAB cache!");
+#endif
}

/* This belongs in file_table.c, not here... */
--- linux-2.1.57/mm/vmscan.c.old Fri Sep 26 08:10:52 1997
+++ linux-2.1.57/mm/vmscan.c Sat Sep 27 09:57:52 1997
@@ -24,12 +24,10 @@
#include <linux/smp_lock.h>
#include <linux/slab.h>

-#include <asm/dma.h>
-#include <asm/system.h> /* for cli()/sti() */
-#include <asm/uaccess.h> /* for copy_to/from_user */
#include <asm/bitops.h>
#include <asm/pgtable.h>

+extern void shrink_inodes(void); /* move to fs.h */
/*
* When are we next due for a page scan?
*/
@@ -356,6 +354,7 @@
int stop;

/* Always trim SLAB caches when memory gets low. */
+ shrink_inodes();
(void) kmem_cache_reap(0, dma, wait);

/* we don't try as hard if we're not waiting.. */

--------------E35572AF69BF2F3F9DEFDAA0--