Re: [patch] set_blocksize/invalidate_buffers race fixes for 2.2.12-final

Andrea Arcangeli (andrea@suse.de)
Tue, 24 Aug 1999 21:05:37 +0200 (CEST)


On Tue, 24 Aug 1999, Ingo Molnar wrote:

>no, it's just that the RAID code has been around during all those buffer.c
>rewrites, and it tried to be very careful and overparanoid. It might not
>be necessery anymore.

Need a cleanup.

>this is an old comment, just ignore it. (there is no bforget in
>cache_drop_behind anymore)

Why don't you use bforget? Why duplicating code?

>in RAID there can be a set_blocksize() called while the device has active
>bhs. This never happens with a 'simple device'. I know it is hacky, but
>the 'users' of the bhs that get their device's size changed under them are
>also aware of this and thus the thing is safe.

set_blocksize can't change the size of the buffers, it can _only_
invalidate the buffer contents. So if b_dev and b_size change under you
(and you are safe w.r.t. SMP) then you must panic.

Anyway the raid code is been removed from final3 so here it is my
race-fixes against 2.2.12-final3:

--- /tmp/linux-2.2.10/fs/buffer.c Tue Jul 13 00:33:14 1999
+++ linux-2.2.10/fs/buffer.c Sat Jul 17 13:08:38 1999
@@ -395,31 +395,6 @@
return err;
}

-void invalidate_buffers(kdev_t dev)
-{
- int i;
- int nlist;
- struct buffer_head * bh;
-
- for(nlist = 0; nlist < NR_LIST; nlist++) {
- bh = lru_list[nlist];
- for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
- if (bh->b_dev != dev)
- continue;
- wait_on_buffer(bh);
- if (bh->b_dev != dev)
- continue;
- if (bh->b_count)
- continue;
- bh->b_flushtime = 0;
- clear_bit(BH_Protected, &bh->b_state);
- clear_bit(BH_Uptodate, &bh->b_state);
- clear_bit(BH_Dirty, &bh->b_state);
- clear_bit(BH_Req, &bh->b_state);
- }
- }
-}
-
#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block)) & bh_hash_mask)
#define hash(dev,block) hash_table[_hashfn(dev,block)]

@@ -486,11 +461,14 @@
remove_from_lru_list(bh);
}

-static inline void put_last_free(struct buffer_head * bh)
+static void put_last_free(struct buffer_head * bh)
{
if (bh) {
struct buffer_head **bhp = &free_list[BUFSIZE_INDEX(bh->b_size)];

+ bh->b_count = 0;
+ bh->b_state = 0;
+ remove_from_queues(bh);
bh->b_dev = B_FREE; /* So it is obvious we are on the free list. */

/* Add to back of free list. */
@@ -510,7 +488,7 @@
{
/* put at end of free list */
if(bh->b_dev == B_FREE) {
- put_last_free(bh);
+ panic("B_FREE inserted into queues");
} else {
struct buffer_head **bhp = &lru_list[bh->b_list];

@@ -600,10 +578,46 @@
return 0;
}

+void invalidate_buffers(kdev_t dev)
+{
+ int i, nlist, slept;
+ struct buffer_head * bh, * bhnext;
+
+ again:
+ slept = 0;
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
+ bh = lru_list[nlist];
+ if (!bh)
+ continue;
+ for (i = nr_buffers_type[nlist] ; i > 0 ;
+ bh = bhnext, i--)
+ {
+ bhnext = bh->b_next_free;
+ if (bh->b_dev != dev)
+ continue;
+ if (buffer_locked(bh))
+ {
+ slept = 1;
+ wait_on_buffer(bh);
+ }
+ if (bh->b_dev != dev)
+ goto panic_changed;
+ if (!bh->b_count)
+ put_last_free(bh);
+ if (slept)
+ goto again;
+ }
+ }
+ return;
+
+ panic_changed:
+ panic("invalidate_buffers: buffer changed under us");
+}
+
void set_blocksize(kdev_t dev, int size)
{
extern int *blksize_size[];
- int i, nlist;
+ int i, nlist, slept;
struct buffer_head * bh, *bhnext;

if (!blksize_size[MAJOR(dev)])
@@ -625,29 +639,41 @@
/* We need to be quite careful how we do this - we are moving entries
* around on the free list, and we can get in a loop if we are not careful.
*/
- for(nlist = 0; nlist < NR_LIST; nlist++) {
+ again:
+ slept = 0;
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
bh = lru_list[nlist];
- for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
- if(!bh)
- break;
-
- bhnext = bh->b_next_free;
- if (bh->b_dev != dev)
- continue;
- if (bh->b_size == size)
+ if (!bh)
+ continue;
+ for (i = nr_buffers_type[nlist] ; i > 0 ;
+ bh = bhnext, i--)
+ {
+ bhnext = bh->b_next_free;
+ if (bh->b_dev != dev || bh->b_size == size)
continue;
- bhnext->b_count++;
- wait_on_buffer(bh);
- bhnext->b_count--;
- if (bh->b_dev == dev && bh->b_size != size) {
- clear_bit(BH_Dirty, &bh->b_state);
- clear_bit(BH_Uptodate, &bh->b_state);
- clear_bit(BH_Req, &bh->b_state);
- bh->b_flushtime = 0;
+ if (buffer_locked(bh))
+ {
+ slept = 1;
+ wait_on_buffer(bh);
}
- remove_from_hash_queue(bh);
+ if (bh->b_dev != dev || bh->b_size == size)
+ goto panic_changed;
+ if (!bh->b_count)
+ put_last_free(bh);
+ else
+ printk(KERN_ERR
+ "set_blocksize: "
+ "b_count %d, dev %s, block %lu!\n",
+ bh->b_count, bdevname(bh->b_dev),
+ bh->b_blocknr);
+ if (slept)
+ goto again;
}
}
+ return;
+
+ panic_changed:
+ panic("set_blocksize: buffer changed under us");
}

/*
@@ -825,9 +851,6 @@
__brelse(buf);
return;
}
- buf->b_count = 0;
- buf->b_state = 0;
- remove_from_queues(buf);
put_last_free(buf);
}

Andrea

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/