patch for fs/buffer.c get_unused_buffer_head

Bill Hawes (whawes@star.net)
Fri, 25 Jul 1997 22:49:22 -0400


This is a multi-part message in MIME format.
--------------9915C0DC3E62D809D211589E
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've been doing some work on fs/buffer.c lately and want to release some
patches for comments and testing. To make it easier to look over, I've
split the changes into three parts.

This first patch for get_unused_buffer_head effectively solves the
long-standing low memory no-buffer-head deadlock problem. It works by
maintaining a special reserve list for async buffer head requests. Such
requests are never allowed to fail, and when the async IO completes, the
returning buffer heads are placed back on the reserve list. This
guarantees that the task sleeping for buffer heads will eventually wake
up.

In contrast, requests for buffer heads for ordinary buffers are allowed
to fail, as refill_freelist is already able to handle failures. This
ensures that ordinary buffer requests won't compete with async requests,
so that paging can continue under extreme low memory conditions.

Using this patch I've been able to boot Linux and compile net-tools with
a total of only 100 buffer heads. (Including 12 for the reserve list.)
It wasn't fast, but it didn't crash or deadlock either. (I also
accidentally fsck'ed a 2G disk with only 200 buffer heads ...)

The patch adds virtually no overhead to support the reserve list; the
only code on the frequent path is a single test with a non-taken jump.
I've also streamlined the flow so that there are no jumps in the most
common case.

The patch is against 2.1.47, and I'll back port it to 2.0.31 if there's
interest.

Regards,
Bill
--------------9915C0DC3E62D809D211589E
Content-Type: text/plain; charset=us-ascii; name="buffer_gub47-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="buffer_gub47-patch"

--- fs/buffer.c.old Sat Jul 19 08:17:10 1997
+++ fs/buffer.c Fri Jul 25 21:39:04 1997
@@ -64,6 +64,8 @@

static kmem_cache_t *bh_cachep;

+#define NR_RESERVE 12
+static struct buffer_head * reserved_list = NULL;
static struct buffer_head * unused_list = NULL;
static struct buffer_head * reuse_list = NULL;
static struct wait_queue * buffer_wait = NULL;
@@ -72,6 +74,7 @@
static int nr_buffers_type[NR_LIST] = {0,};
static int nr_buffer_heads = 0;
static int nr_unused_buffer_heads = 0;
+static int nr_reserved_bh = 0;
static int refilled = 0; /* Set NZ when a buffer freelist is refilled
this is used by the loop device */

@@ -1022,44 +1053,41 @@
return NULL;
}

+/*
+ * WSH 07/25/97: Added reserve list for async buffer heads.
+ */
static void put_unused_buffer_head(struct buffer_head * bh)
{
+ struct buffer_head **list = &unused_list;
+
+ if (nr_reserved_bh < NR_RESERVE)
+ goto put_reserve;
+
if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
nr_buffer_heads--;
kmem_cache_free(bh_cachep, bh);
return;
}

- memset(bh,0,sizeof(*bh));
nr_unused_buffer_heads++;
- bh->b_next_free = unused_list;
- unused_list = bh;
- wake_up(&buffer_wait);
-}
-
-static void get_more_buffer_heads(void)
-{
- struct buffer_head * bh;

- while (!unused_list) {
- /* This is critical. We can't swap out pages to get
- * more buffer heads, because the swap-out may need
- * more buffer-heads itself. Thus SLAB_ATOMIC.
- */
- if((bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC)) != NULL) {
- put_unused_buffer_head(bh);
- nr_buffer_heads++;
- return;
- }
-
- /* Uhhuh. We're _really_ low on memory. Now we just
- * wait for old buffer heads to become free due to
- * finishing IO..
- */
- run_task_queue(&tq_disk);
- sleep_on(&buffer_wait);
- }
+put_buffer:
+ memset(bh,0,sizeof(*bh));
+ bh->b_next_free = *list;
+ *list = bh;
+ if (!waitqueue_active(&buffer_wait))
+ return;
+ wake_up(&buffer_wait);
+ return;

+ /*
+ * Put the returning buffer on the reserved list. This won't be
+ * called very often, so it's off the common path.
+ */
+put_reserve:
+ list = &reserved_list;
+ nr_reserved_bh++;
+ goto put_buffer;
}

/*
@@ -1083,18 +1111,58 @@
}
}

-static struct buffer_head * get_unused_buffer_head(void)
+/*
+ * WSH 07/24/97: Restructured to remove jumps from the common path.
+ * Check the reserve list for async buffer heads, and sleep only for
+ * async buffer heads to avoid low-memory deadlocks.
+ */
+static struct buffer_head * get_unused_buffer_head(int async)
{
struct buffer_head * bh;

+repeat:
recover_reusable_buffer_heads();
- get_more_buffer_heads();
- if (!unused_list)
+ if (unused_list) {
+ bh = unused_list;
+ unused_list = bh->b_next_free;
+ nr_unused_buffer_heads--;
+ return bh;
+ }
+
+ /* This is critical. We can't swap out pages to get
+ * more buffer heads, because the swap-out may need
+ * more buffer-heads itself. Thus SLAB_ATOMIC.
+ */
+ if((bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC)) != NULL) {
+ memset(bh, 0, sizeof(*bh));
+ nr_buffer_heads++;
+ return bh;
+ }
+
+ /*
+ * Allocations for ordinary buffers can handle failure ...
+ */
+ if (!async)
return NULL;
- bh = unused_list;
- unused_list = bh->b_next_free;
- nr_unused_buffer_heads--;
- return bh;
+
+ /*
+ * We need an async buffer, so check the special reserve list.
+ */
+ if (reserved_list) {
+ bh = reserved_list;
+ reserved_list = bh->b_next_free;
+ nr_reserved_bh--;
+ return bh;
+ }
+
+ /* Uhhuh. We're _really_ low on memory. Now we just
+ * wait for old buffer heads to become free due to
+ * finishing IO. Since the reserve list is empty,
+ * we're sure there are async buffer heads in use.
+ */
+ run_task_queue(&tq_disk);
+ sleep_on(&buffer_wait);
+ goto repeat;
}

/*
@@ -1103,7 +1171,8 @@
* follow the buffers created. Return NULL if unable to create more
* buffers.
*/
-static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
+static struct buffer_head * create_buffers(unsigned long page,
+ unsigned long size, int async)
{
struct buffer_head *bh, *head;
long offset;
@@ -1111,7 +1180,7 @@
head = NULL;
offset = PAGE_SIZE;
while ((offset -= size) >= 0) {
- bh = get_unused_buffer_head();
+ bh = get_unused_buffer_head(async);
if (!bh)
goto no_grow;

@@ -1189,12 +1283,13 @@
clear_bit(PG_uptodate, &page->flags);
clear_bit(PG_error, &page->flags);
/*
- * Allocate buffer heads pointing to this page, just for I/O.
+ * Allocate async buffer heads pointing to this page, just for I/O.
* They do _not_ show up in the buffer hash table!
* They are _not_ registered in page->buffers either!
*/
- bh = create_buffers(page_address(page), size);
+ bh = create_buffers(page_address(page), size, 1);
if (!bh) {
+ /* WSH: exit here leaves page->count incremented */
clear_bit(PG_locked, &page->flags);
wake_up(&page->wait);
return -ENOMEM;
@@ -1405,16 +1514,15 @@
return 0;
}

- isize = BUFSIZE_INDEX(size);
-
if (!(page = __get_free_page(pri)))
return 0;
- bh = create_buffers(page, size);
+ bh = create_buffers(page, size, 0);
if (!bh) {
free_page(page);
return 0;
}

+ isize = BUFSIZE_INDEX(size);
insert_point = free_list[isize];

tmp = bh;
@@ -1554,6 +1660,18 @@
SLAB_HWCACHE_ALIGN, NULL, NULL);
if(!bh_cachep)
panic("Cannot create buffer head SLAB cache\n");
+ /*
+ * Fill the reserve list.
+ */
+ while (nr_buffer_heads < NR_RESERVE) {
+ struct buffer_head * bh;
+
+ bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC);
+ if (!bh)
+ break;
+ put_unused_buffer_head(bh);
+ nr_buffer_heads++;
+ }

lru_list[BUF_CLEAN] = 0;
grow_buffers(GFP_KERNEL, BLOCK_SIZE);

--------------9915C0DC3E62D809D211589E--