[PATCH] Lazy discard ext3 reservation window patch

From: Mingming Cao
Date: Wed Apr 21 2004 - 18:29:20 EST


Andrew,

This patch contains several changes against the ext3 reservation code in
265-mm6 tree:

Lazy Discard Reservation Window:
This patch is trying to do lazy discard: keep the old reservation
window temporally until we find the new reservation window, only do
remove/add if the new reservation window locate different than the old
one. (The reservation code in mm6 tree will discard the old one first,
then search the new one). Two reasons:
- If the ext3_find_goal() does a good job, the reservation windows on
the list should not very close to each other. So a inode's new
reservation window is likely located just next to it's old one, it's
position in the whole list is unchanged, no need to do remove and then
add the new one to the list in the same location. Just update the start
block and end block.
- If we failed to find a new reservation in the goal group and move on
the search to the next group, having the old reservation around
temporally could allow us to search the list directly after the old
window. Otherwise we lost where we were and has to start from the
beginning of the list. Eventually the old window will be discard when we
found a new one.

Other changes:
- Add check to force maximum when dynamically increase window size.
- ext3_discard_reservation() should not be called on every iput(). Now
it is moved to ext3_delete_inode(), so it is only called on the last
iput() if i_nlink is 0
- remove #ifdef EXT3_RESERVATION since we made reservation an mount
option
- Only allow application to modify the file's reservation window size
when fs is mounted with reservation and the operation is performed on
regular files.

This patch should apply to 2.6.5-mm6. Have tested it through many dd
test, untar test,dbench and tiobench.

Thanks!

Mingming
diff -urNP -X dontdiff 265-mm6-regression-fix/fs/ext3/balloc.c 265-mm6-fix2/fs/ext3/balloc.c
--- 265-mm6-regression-fix/fs/ext3/balloc.c 2004-04-21 18:35:21.916666616 -0700
+++ 265-mm6-fix2/fs/ext3/balloc.c 2004-04-21 18:28:47.621608576 -0700
@@ -723,7 +723,7 @@
start_block = goal + group_first_block;

size = atomic_read(&my_rsv->rsv_goal_size);
- /* if we have a old reservation, discard it first */
+ /* if we have a old reservation, start the search from the old rsv */
if (!rsv_is_empty(my_rsv)) {
/*
* if the old reservation is cross group boundary
@@ -745,8 +745,7 @@
/* remember where we are before we discard the old one */
if (my_rsv->rsv_end + 1 > start_block)
start_block = my_rsv->rsv_end + 1;
- search_head = list_entry(my_rsv->rsv_list.prev,
- struct reserve_window, rsv_list);
+ search_head = my_rsv;
if ((my_rsv->rsv_alloc_hit > (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
/*
* if we previously allocation hit ration is greater than half
@@ -754,9 +753,10 @@
* otherwise keep the same
*/
size = size * 2;
+ if (size > EXT3_MAX_RESERVE_BLOCKS)
+ size = EXT3_MAX_RESERVE_BLOCKS;
atomic_set(&my_rsv->rsv_goal_size, size);
}
- rsv_window_remove(my_rsv);
}
else {
/*
@@ -823,9 +823,17 @@
found_rsv_window:
/*
* great! the reservable space contains some free blocks.
- * Insert it to the list.
- */
- rsv_window_add(my_rsv, prev_rsv);
+ *
+ * if the search returns that we should add the new
+ * window just next to where the old window, we don't
+ * need to remove the old window first then add it to the
+ * same place, just update the new start and new end.
+ */
+ if (my_rsv != prev_rsv) {
+ if (!rsv_is_empty(my_rsv))
+ rsv_window_remove(my_rsv);
+ rsv_window_add(my_rsv, prev_rsv);
+ }
my_rsv->rsv_start = reservable_space_start;
my_rsv->rsv_end = my_rsv->rsv_start + size - 1;
return 0; /* succeed */
@@ -927,6 +935,10 @@
if (!goal_in_my_reservation(my_rsv, goal, group, sb))
goal = -1;
}
+ if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
+ || (my_rsv->rsv_end < group_first_block))
+ BUG();
+
ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal,
my_rsv);
if (ret >= 0)
@@ -996,10 +1008,10 @@
sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es;
ext3_debug("goal=%lu.\n", goal);
-#ifdef EXT3_RESERVATION
+
if (test_opt(sb, RESERVATION) && S_ISREG(inode->i_mode))
my_rsv = &EXT3_I(inode)->i_rsv_window;
-#endif
+
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
@@ -1066,7 +1078,6 @@
if (ret_block >= 0)
goto allocated;
}
-#ifdef EXT3_RESERVATION
/*
* We may end up a bogus ealier ENOSPC error due to
* filesystem is "full" of reservations, but
@@ -1075,17 +1086,11 @@
* just do block allocation as without reservations.
*/
if (my_rsv) {
-#ifdef EXT3_RESERVATION_DEBUG
- printk("filesystem is fully reserved. Actual free blocks: %d. "
- "Try to do allocation without reservation, goal_group "
- "is %d\n",
- free_blocks, goal_group);
-#endif
my_rsv = NULL;
group_no = goal_group;
goto retry;
}
-#endif
+
/* No space left on the device */
*errp = -ENOSPC;
goto out;
diff -urNP -X dontdiff 265-mm6-regression-fix/fs/ext3/inode.c 265-mm6-fix2/fs/ext3/inode.c
--- 265-mm6-regression-fix/fs/ext3/inode.c 2004-04-21 18:34:45.393219024 -0700
+++ 265-mm6-fix2/fs/ext3/inode.c 2004-04-21 18:21:35.747263456 -0700
@@ -177,19 +177,6 @@
}

/*
- * Called at each iput()
- *
- * The inode may be "bad" if ext3_read_inode() saw an error from
- * ext3_get_inode(), so we need to check that to avoid freeing random disk
- * blocks.
- */
-void ext3_put_inode(struct inode *inode)
-{
- if (!is_bad_inode(inode))
- ext3_discard_reservation(inode);
-}
-
-/*
* Called at the last iput() if i_nlink is zero.
*/
void ext3_delete_inode (struct inode * inode)
@@ -199,6 +186,9 @@
if (is_bad_inode(inode))
goto no_delete;

+ /* discard the block reservation */
+ ext3_discard_reservation(inode);
+
handle = start_transaction(inode);
if (IS_ERR(handle)) {
/* If we're going to skip the normal cleanup, we still
diff -urNP -X dontdiff 265-mm6-regression-fix/fs/ext3/ioctl.c 265-mm6-fix2/fs/ext3/ioctl.c
--- 265-mm6-regression-fix/fs/ext3/ioctl.c 2004-04-21 18:34:45.389219632 -0700
+++ 265-mm6-fix2/fs/ext3/ioctl.c 2004-04-21 18:22:28.196289992 -0700
@@ -152,11 +152,16 @@
return ret;
}
#endif
-#ifdef EXT3_RESERVATION
case EXT3_IOC_GETRSVSZ:
- rsv_window_size = atomic_read(&ei->i_rsv_window.rsv_goal_size);
- return put_user(rsv_window_size, (int *)arg);
+ if (test_opt(inode->i_sb, RESERVATION) && S_ISREG(inode->i_mode)) {
+ rsv_window_size = atomic_read(&ei->i_rsv_window.rsv_goal_size);
+ return put_user(rsv_window_size, (int *)arg);
+ }
+ return -ENOTTY;
case EXT3_IOC_SETRSVSZ:
+ if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
+ return -ENOTTY;
+
if (IS_RDONLY(inode))
return -EROFS;

@@ -170,7 +175,6 @@
rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
atomic_set(&ei->i_rsv_window.rsv_goal_size, rsv_window_size);
return 0;
-#endif
default:
return -ENOTTY;
}
diff -urNP -X dontdiff 265-mm6-regression-fix/fs/ext3/super.c 265-mm6-fix2/fs/ext3/super.c
--- 265-mm6-regression-fix/fs/ext3/super.c 2004-04-21 18:34:45.394218872 -0700
+++ 265-mm6-fix2/fs/ext3/super.c 2004-04-21 18:21:35.755262240 -0700
@@ -551,7 +551,6 @@
.read_inode = ext3_read_inode,
.write_inode = ext3_write_inode,
.dirty_inode = ext3_dirty_inode,
- .put_inode = ext3_put_inode,
.delete_inode = ext3_delete_inode,
.put_super = ext3_put_super,
.write_super = ext3_write_super,
@@ -760,19 +759,12 @@
printk("EXT3 (no)acl options not supported\n");
break;
#endif
-#ifdef EXT3_RESERVATION
case Opt_reservation:
set_opt(sbi->s_mount_opt, RESERVATION);
break;
case Opt_noreservation:
clear_opt(sbi->s_mount_opt, RESERVATION);
break;
-#else
- case Opt_reservation:
- case Opt_noreservation:
- printk("EXT3 block reservation options not supported\n");
- break;
-#endif
case Opt_journal_update:
/* @@@ FIXME */
/* Eventually we will want to be able to create
diff -urNP -X dontdiff 265-mm6-regression-fix/include/linux/ext3_fs.h 265-mm6-fix2/include/linux/ext3_fs.h
--- 265-mm6-regression-fix/include/linux/ext3_fs.h 2004-04-21 18:34:43.546499768 -0700
+++ 265-mm6-fix2/include/linux/ext3_fs.h 2004-04-21 18:21:35.769260112 -0700
@@ -35,7 +35,6 @@
/*
* Define EXT3_RESERVATION to reserve data blocks for expanding files
*/
-#define EXT3_RESERVATION
#define EXT3_DEFAULT_RESERVE_BLOCKS 8
#define EXT3_MAX_RESERVE_BLOCKS 1024
/*
@@ -208,10 +207,8 @@
#ifdef CONFIG_JBD_DEBUG
#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
#endif
-#ifdef EXT3_RESERVATION
#define EXT3_IOC_GETRSVSZ _IOR('r', 1, long)
#define EXT3_IOC_SETRSVSZ _IOW('r', 2, long)
-#endif

/*
* Structure of an inode on the disk