Re: [PATCH] distributed counters for ext2 to avoid group scaning

From: Alex Tomas (bzzz@tmi.comex.ru)
Date: Mon Mar 17 2003 - 05:41:41 EST


>>>>> William Lee Irwin (WLI) writes:

 WLI> On Sun, Mar 16, 2003 at 06:01:55PM +0300, Alex Tomas wrote:
>> ext2 with concurrent balloc/ialloc doesn't maintain global free
>> inodes/blocks counters. this is due to badness of spinlocks and
>> atomic_t from big iron's viewpoint. therefore, to know these
>> values we should scan all group descriptors. there are 81 groups
>> for 10G fs. I believe there is method to avoid scaning and
>> decrease memory footprint.

 WLI> benching now

here is the patch against virgin 2.5.64 containing:
1) concurrent balloc
2) concurrent ialloc
3) no-space fix
4) distributed counters for free blocks, free inodes and dirs
4) LOTS of Andrew Morton's corrections

diff -uNr linux-2.5.64/fs/ext2/balloc.c linux-2.5.64-ciba/fs/ext2/balloc.c
--- linux-2.5.64/fs/ext2/balloc.c Thu Feb 20 16:18:53 2003
+++ linux-2.5.64-ciba/fs/ext2/balloc.c Mon Mar 17 13:26:05 2003
@@ -94,69 +94,71 @@
         return bh;
 }
 
-static inline int reserve_blocks(struct super_block *sb, int count)
+/*
+ * Set sb->s_dirt here because the superblock was "logically" altered. We
+ * need to recalculate its free blocks count and flush it out.
+ */
+static int
+group_reserve_blocks(struct super_block *sb, struct ext2_bg_info *bgi,
+ struct ext2_group_desc *desc, struct buffer_head *bh,
+ int count, int use_reserve)
 {
- struct ext2_sb_info * sbi = EXT2_SB(sb);
- struct ext2_super_block * es = sbi->s_es;
- unsigned free_blocks = le32_to_cpu(es->s_free_blocks_count);
- unsigned root_blocks = le32_to_cpu(es->s_r_blocks_count);
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ unsigned free_blocks;
+ unsigned root_blocks;
+
+ spin_lock(&bgi->balloc_lock);
 
+ free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
         if (free_blocks < count)
                 count = free_blocks;
+ root_blocks = bgi->reserved;
+
+ if (free_blocks < bgi->reserved && !use_reserve) {
+ /* don't use reserved blocks */
+ spin_unlock(&bgi->balloc_lock);
+ return 0;
+ }
 
- if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
- sbi->s_resuid != current->fsuid &&
- (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+ if (free_blocks < bgi->reserved + count &&
+ !capable(CAP_SYS_RESOURCE) &&
+ sbi->s_resuid != current->fsuid &&
+ (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
                 /*
                  * We are too close to reserve and we are not privileged.
                  * Can we allocate anything at all?
                  */
- if (free_blocks > root_blocks)
- count = free_blocks - root_blocks;
- else
+ if (free_blocks > bgi->reserved) {
+ count = free_blocks - bgi->reserved;
+ } else {
+ spin_unlock(&bgi->balloc_lock);
                         return 0;
+ }
         }
+ desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
 
- es->s_free_blocks_count = cpu_to_le32(free_blocks - count);
- mark_buffer_dirty(sbi->s_sbh);
+ spin_unlock(&bgi->balloc_lock);
+ dcounter_add(&EXT2_SB(sb)->free_blocks_dc, -count);
         sb->s_dirt = 1;
+ mark_buffer_dirty(bh);
         return count;
 }
 
-static inline void release_blocks(struct super_block *sb, int count)
+static void group_release_blocks(struct super_block *sb,
+ struct ext2_bg_info *bgi, struct ext2_group_desc *desc,
+ struct buffer_head *bh, int count)
 {
         if (count) {
- struct ext2_sb_info * sbi = EXT2_SB(sb);
- struct ext2_super_block * es = sbi->s_es;
- unsigned free_blocks = le32_to_cpu(es->s_free_blocks_count);
- es->s_free_blocks_count = cpu_to_le32(free_blocks + count);
- mark_buffer_dirty(sbi->s_sbh);
- sb->s_dirt = 1;
- }
-}
-
-static inline int group_reserve_blocks(struct ext2_group_desc *desc,
- struct buffer_head *bh, int count)
-{
- unsigned free_blocks;
+ unsigned free_blocks;
 
- if (!desc->bg_free_blocks_count)
- return 0;
-
- free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
- if (free_blocks < count)
- count = free_blocks;
- desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
- mark_buffer_dirty(bh);
- return count;
-}
+ spin_lock(&bgi->balloc_lock);
 
-static inline void group_release_blocks(struct ext2_group_desc *desc,
- struct buffer_head *bh, int count)
-{
- if (count) {
- unsigned free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
+ free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
                 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
+
+ spin_unlock(&bgi->balloc_lock);
+ dcounter_add(&EXT2_SB(sb)->free_blocks_dc, count);
+ sb->s_dirt = 1;
                 mark_buffer_dirty(bh);
         }
 }
@@ -172,12 +174,11 @@
         unsigned long i;
         unsigned long overflow;
         struct super_block * sb = inode->i_sb;
+ struct ext2_sb_info * sbi = EXT2_SB(sb);
         struct ext2_group_desc * desc;
- struct ext2_super_block * es;
+ struct ext2_super_block * es = sbi->s_es;
         unsigned freed = 0, group_freed;
 
- lock_super (sb);
- es = EXT2_SB(sb)->s_es;
         if (block < le32_to_cpu(es->s_first_data_block) ||
             block + count < block ||
             block + count > le32_to_cpu(es->s_blocks_count)) {
@@ -215,16 +216,17 @@
         if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
             in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
             in_range (block, le32_to_cpu(desc->bg_inode_table),
- EXT2_SB(sb)->s_itb_per_group) ||
+ sbi->s_itb_per_group) ||
             in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
- EXT2_SB(sb)->s_itb_per_group))
+ sbi->s_itb_per_group))
                 ext2_error (sb, "ext2_free_blocks",
                             "Freeing blocks in system zones - "
                             "Block = %lu, count = %lu",
                             block, count);
 
         for (i = 0, group_freed = 0; i < count; i++) {
- if (!ext2_clear_bit(bit + i, bitmap_bh->b_data))
+ if (!ext2_clear_bit_atomic(&sbi->s_bgi[block_group].balloc_lock,
+ bit + i, (void *) bitmap_bh->b_data))
                         ext2_error (sb, "ext2_free_blocks",
                                       "bit already cleared for block %lu",
                                       block + i);
@@ -236,7 +238,8 @@
         if (sb->s_flags & MS_SYNCHRONOUS)
                 sync_dirty_buffer(bitmap_bh);
 
- group_release_blocks(desc, bh2, group_freed);
+ group_release_blocks(sb, &sbi->s_bgi[block_group],
+ desc, bh2, group_freed);
         freed += group_freed;
 
         if (overflow) {
@@ -246,18 +249,18 @@
         }
 error_return:
         brelse(bitmap_bh);
- release_blocks(sb, freed);
- unlock_super (sb);
         DQUOT_FREE_BLOCK(inode, freed);
 }
 
-static int grab_block(char *map, unsigned size, int goal)
+static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
 {
         int k;
         char *p, *r;
 
         if (!ext2_test_bit(goal, map))
                 goto got_it;
+
+repeat:
         if (goal) {
                 /*
                  * The goal was occupied; search forward for a free
@@ -297,7 +300,8 @@
         }
         return -1;
 got_it:
- ext2_set_bit(goal, map);
+ if (ext2_set_bit_atomic(lock, goal, (void *) map))
+ goto repeat;
         return goal;
 }
 
@@ -309,17 +313,17 @@
  * bitmap, and then for any free bit if that fails.
  * This function also updates quota and i_blocks field.
  */
-int ext2_new_block (struct inode * inode, unsigned long goal,
- u32 * prealloc_count, u32 * prealloc_block, int * err)
+int ext2_new_block(struct inode *inode, unsigned long goal,
+ u32 *prealloc_count, u32 *prealloc_block, int *err)
 {
         struct buffer_head *bitmap_bh = NULL;
         struct buffer_head *gdp_bh; /* bh2 */
         struct ext2_group_desc *desc;
         int group_no; /* i */
         int ret_block; /* j */
- int bit; /* k */
+ int bit; /* k */
         int target_block; /* tmp */
- int block = 0;
+ int block = 0, use_reserve = 0;
         struct super_block *sb = inode->i_sb;
         struct ext2_sb_info *sbi = EXT2_SB(sb);
         struct ext2_super_block *es = sbi->s_es;
@@ -341,14 +345,7 @@
                 prealloc_goal--;
 
         dq_alloc = prealloc_goal + 1;
-
- lock_super (sb);
-
- es_alloc = reserve_blocks(sb, dq_alloc);
- if (!es_alloc) {
- *err = -ENOSPC;
- goto out_unlock;
- }
+ es_alloc = dq_alloc;
 
         ext2_debug ("goal=%lu.\n", goal);
 
@@ -360,7 +357,8 @@
         if (!desc)
                 goto io_error;
 
- group_alloc = group_reserve_blocks(desc, gdp_bh, es_alloc);
+ group_alloc = group_reserve_blocks(sb, &sbi->s_bgi[group_no],
+ desc, gdp_bh, es_alloc, 0);
         if (group_alloc) {
                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
                                         group_size);
@@ -371,11 +369,13 @@
                 
                 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
 
- ret_block = grab_block(bitmap_bh->b_data,
+ ret_block = grab_block(&sbi->s_bgi[group_no].balloc_lock,
+ bitmap_bh->b_data,
                                 group_size, ret_block);
                 if (ret_block >= 0)
                         goto got_block;
- group_release_blocks(desc, gdp_bh, group_alloc);
+ group_release_blocks(sb, &sbi->s_bgi[group_no],
+ desc, gdp_bh, group_alloc);
                 group_alloc = 0;
         }
 
@@ -385,6 +385,7 @@
          * Now search the rest of the groups. We assume that
          * i and desc correctly point to the last group visited.
          */
+repeat:
         for (bit = 0; !group_alloc &&
                         bit < sbi->s_groups_count; bit++) {
                 group_no++;
@@ -393,9 +394,18 @@
                 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
                 if (!desc)
                         goto io_error;
- group_alloc = group_reserve_blocks(desc, gdp_bh, es_alloc);
+ group_alloc = group_reserve_blocks(sb, &sbi->s_bgi[group_no],
+ desc, gdp_bh, es_alloc, use_reserve);
         }
- if (bit >= sbi->s_groups_count) {
+ if (!use_reserve) {
+ /* first time we did not try to allocate
+ * reserved blocks. now it looks like
+ * no more non-reserved blocks left. we
+ * will try to allocate reserved blocks -bzzz */
+ use_reserve = 1;
+ goto repeat;
+ }
+ if (!group_alloc) {
                 *err = -ENOSPC;
                 goto out_release;
         }
@@ -404,13 +414,11 @@
         if (!bitmap_bh)
                 goto io_error;
 
- ret_block = grab_block(bitmap_bh->b_data, group_size, 0);
+ ret_block = grab_block(&sbi->s_bgi[group_no].balloc_lock,
+ bitmap_bh->b_data, group_size, 0);
         if (ret_block < 0) {
- ext2_error (sb, "ext2_new_block",
- "Free blocks count corrupted for block group %d",
- group_no);
                 group_alloc = 0;
- goto io_error;
+ goto repeat;
         }
 
 got_block:
@@ -452,7 +460,9 @@
                 unsigned n;
 
                 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
- if (ext2_set_bit(ret_block, bitmap_bh->b_data))
+ if (ext2_set_bit_atomic(&sbi->s_bgi[group_no].balloc_lock,
+ ret_block,
+ (void*) bitmap_bh->b_data))
                                  break;
                 }
                 *prealloc_block = block + 1;
@@ -471,10 +481,8 @@
 
         *err = 0;
 out_release:
- group_release_blocks(desc, gdp_bh, group_alloc);
- release_blocks(sb, es_alloc);
-out_unlock:
- unlock_super (sb);
+ group_release_blocks(sb, &sbi->s_bgi[group_no],
+ desc, gdp_bh, group_alloc);
         DQUOT_FREE_BLOCK(inode, dq_alloc);
 out:
         brelse(bitmap_bh);
@@ -485,13 +493,18 @@
         goto out_release;
 }
 
-unsigned long ext2_count_free_blocks (struct super_block * sb)
+unsigned long ext2_count_free_blocks(struct super_block *sb)
+{
+ return dcounter_value(&EXT2_SB(sb)->free_blocks_dc);
+}
+
+unsigned long ext2_count_free_blocks_old(struct super_block *sb)
 {
-#ifdef EXT2FS_DEBUG
- struct ext2_super_block * es;
- unsigned long desc_count, bitmap_count, x;
         struct ext2_group_desc * desc;
+ unsigned long desc_count = 0;
         int i;
+#ifdef EXT2FS_DEBUG
+ unsigned long bitmap_count, x;
         
         lock_super (sb);
         es = EXT2_SB(sb)->s_es;
@@ -519,13 +532,18 @@
         unlock_super (sb);
         return bitmap_count;
 #else
- return le32_to_cpu(EXT2_SB(sb)->s_es->s_free_blocks_count);
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ desc = ext2_get_group_desc (sb, i, NULL);
+ if (!desc)
+ continue;
+ desc_count += le16_to_cpu(desc->bg_free_blocks_count);
+ }
+ return desc_count;
 #endif
 }
 
-static inline int block_in_use (unsigned long block,
- struct super_block * sb,
- unsigned char * map)
+static inline int
+block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
 {
         return ext2_test_bit ((block - le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
                          EXT2_BLOCKS_PER_GROUP(sb), map);
diff -uNr linux-2.5.64/fs/ext2/ialloc.c linux-2.5.64-ciba/fs/ext2/ialloc.c
--- linux-2.5.64/fs/ext2/ialloc.c Fri Mar 14 01:53:36 2003
+++ linux-2.5.64-ciba/fs/ext2/ialloc.c Mon Mar 17 13:26:05 2003
@@ -64,6 +64,68 @@
 }
 
 /*
+ * Speculatively reserve an inode in a blockgroup which used to have some
+ * spare ones. Later, when we come to actually claim the inode in the bitmap
+ * it may be that it was taken. In that case the allocator will undo this
+ * reservation and try again.
+ *
+ * The inode allocator does not physically alter the superblock. But we still
+ * set sb->s_dirt, because the superblock was "logically" altered - we need to
+ * go and add up the free inodes counts again and flush out the superblock.
+ */
+static void ext2_reserve_inode(struct super_block *sb, int group, int dir)
+{
+ struct ext2_group_desc * desc;
+ struct buffer_head *bh;
+
+ desc = ext2_get_group_desc(sb, group, &bh);
+ if (!desc) {
+ ext2_error(sb, "ext2_reserve_inode",
+ "can't get descriptor for group %d", group);
+ return;
+ }
+
+ spin_lock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+ desc->bg_free_inodes_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
+ if (dir) {
+ desc->bg_used_dirs_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
+ dcounter_add(&EXT2_SB(sb)->dirs_dc, 1);
+ }
+ spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+ dcounter_add(&EXT2_SB(sb)->free_inodes_dc, -1);
+ sb->s_dirt = 1;
+ mark_buffer_dirty(bh);
+}
+
+static void ext2_release_inode(struct super_block *sb, int group, int dir)
+{
+ struct ext2_group_desc * desc;
+ struct buffer_head *bh;
+
+ desc = ext2_get_group_desc(sb, group, &bh);
+ if (!desc) {
+ ext2_error(sb, "ext2_release_inode",
+ "can't get descriptor for group %d", group);
+ return;
+ }
+
+ spin_lock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+ desc->bg_free_inodes_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
+ if (dir) {
+ desc->bg_used_dirs_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
+ dcounter_add(&EXT2_SB(sb)->dirs_dc, -1);
+ }
+ spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
+ dcounter_add(&EXT2_SB(sb)->free_inodes_dc, 1);
+ sb->s_dirt = 1;
+ mark_buffer_dirty(bh);
+}
+
+/*
  * NOTE! When we get the inode, we're the only people
  * that have access to it, and as such there are no
  * race conditions we have to worry about. The inode
@@ -85,10 +147,8 @@
         int is_directory;
         unsigned long ino;
         struct buffer_head *bitmap_bh = NULL;
- struct buffer_head *bh2;
         unsigned long block_group;
         unsigned long bit;
- struct ext2_group_desc * desc;
         struct ext2_super_block * es;
 
         ino = inode->i_ino;
@@ -105,7 +165,6 @@
                 DQUOT_DROP(inode);
         }
 
- lock_super (sb);
         es = EXT2_SB(sb)->s_es;
         is_directory = S_ISDIR(inode->i_mode);
 
@@ -126,32 +185,17 @@
                 goto error_return;
 
         /* Ok, now we can actually update the inode bitmaps.. */
- if (!ext2_clear_bit(bit, bitmap_bh->b_data))
+ if (!ext2_clear_bit_atomic(&EXT2_SB(sb)->s_bgi[block_group].ialloc_lock,
+ bit, (void *) bitmap_bh->b_data))
                 ext2_error (sb, "ext2_free_inode",
                               "bit already cleared for inode %lu", ino);
- else {
- desc = ext2_get_group_desc (sb, block_group, &bh2);
- if (desc) {
- desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
- if (is_directory) {
- desc->bg_used_dirs_count =
- cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
- EXT2_SB(sb)->s_dir_count--;
- }
- }
- mark_buffer_dirty(bh2);
- es->s_free_inodes_count =
- cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
- mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
- }
+ else
+ ext2_release_inode(sb, block_group, is_directory);
         mark_buffer_dirty(bitmap_bh);
         if (sb->s_flags & MS_SYNCHRONOUS)
                 sync_dirty_buffer(bitmap_bh);
- sb->s_dirt = 1;
 error_return:
         brelse(bitmap_bh);
- unlock_super (sb);
 }
 
 /*
@@ -211,9 +255,8 @@
  */
 static int find_group_dir(struct super_block *sb, struct inode *parent)
 {
- struct ext2_super_block * es = EXT2_SB(sb)->s_es;
         int ngroups = EXT2_SB(sb)->s_groups_count;
- int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+ int avefreei = ext2_count_free_inodes(sb) / ngroups;
         struct ext2_group_desc *desc, *best_desc = NULL;
         struct buffer_head *bh, *best_bh = NULL;
         int group, best_group = -1;
@@ -234,11 +277,9 @@
         }
         if (!best_desc)
                 return -1;
- best_desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(best_desc->bg_free_inodes_count) - 1);
- best_desc->bg_used_dirs_count =
- cpu_to_le16(le16_to_cpu(best_desc->bg_used_dirs_count) + 1);
- mark_buffer_dirty(best_bh);
+
+ ext2_reserve_inode(sb, best_group, 1);
+
         return best_group;
 }
 
@@ -277,10 +318,12 @@
         struct ext2_super_block *es = sbi->s_es;
         int ngroups = sbi->s_groups_count;
         int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
- int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
- int avefreeb = le32_to_cpu(es->s_free_blocks_count) / ngroups;
+ int freei = ext2_count_free_inodes(sb);
+ int avefreei = freei / ngroups;
+ int free_blocks = ext2_count_free_blocks(sb);
+ int avefreeb = free_blocks / ngroups;
         int blocks_per_dir;
- int ndirs = sbi->s_dir_count;
+ int ndirs = dcounter_value(&sbi->dirs_dc);
         int max_debt, max_dirs, min_blocks, min_inodes;
         int group = -1, i;
         struct ext2_group_desc *desc;
@@ -320,8 +363,7 @@
                 goto fallback;
         }
 
- blocks_per_dir = (le32_to_cpu(es->s_blocks_count) -
- le32_to_cpu(es->s_free_blocks_count)) / ndirs;
+ blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - free_blocks) / ndirs;
 
         max_dirs = ndirs / ngroups + inodes_per_group / 16;
         min_inodes = avefreei - inodes_per_group / 4;
@@ -340,7 +382,7 @@
                 desc = ext2_get_group_desc (sb, group, &bh);
                 if (!desc || !desc->bg_free_inodes_count)
                         continue;
- if (sbi->s_debts[group] >= max_debt)
+ if (sbi->s_bgi[group].debts >= max_debt)
                         continue;
                 if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
                         continue;
@@ -364,12 +406,8 @@
         return -1;
 
 found:
- desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
- desc->bg_used_dirs_count =
- cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
- sbi->s_dir_count++;
- mark_buffer_dirty(bh);
+ ext2_reserve_inode(sb, group, 1);
+
         return group;
 }
 
@@ -431,9 +469,8 @@
         return -1;
 
 found:
- desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
- mark_buffer_dirty(bh);
+ ext2_reserve_inode(sb, group, 0);
+
         return group;
 }
 
@@ -456,7 +493,6 @@
                 return ERR_PTR(-ENOMEM);
 
         ei = EXT2_I(inode);
- lock_super (sb);
         es = EXT2_SB(sb)->s_es;
 repeat:
         if (S_ISDIR(mode)) {
@@ -480,7 +516,12 @@
                                       EXT2_INODES_PER_GROUP(sb));
         if (i >= EXT2_INODES_PER_GROUP(sb))
                 goto bad_count;
- ext2_set_bit(i, bitmap_bh->b_data);
+ if (ext2_set_bit_atomic(&EXT2_SB(sb)->s_bgi[group].ialloc_lock,
+ i, (void *) bitmap_bh->b_data)) {
+ brelse(bitmap_bh);
+ ext2_release_inode(sb, group, S_ISDIR(mode));
+ goto repeat;
+ }
 
         mark_buffer_dirty(bitmap_bh);
         if (sb->s_flags & MS_SYNCHRONOUS)
@@ -497,19 +538,16 @@
                 goto fail2;
         }
 
- es->s_free_inodes_count =
- cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
-
+ spin_lock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
         if (S_ISDIR(mode)) {
- if (EXT2_SB(sb)->s_debts[group] < 255)
- EXT2_SB(sb)->s_debts[group]++;
+ if (EXT2_SB(sb)->s_bgi[group].debts < 255)
+ EXT2_SB(sb)->s_bgi[group].debts++;
         } else {
- if (EXT2_SB(sb)->s_debts[group])
- EXT2_SB(sb)->s_debts[group]--;
+ if (EXT2_SB(sb)->s_bgi[group].debts)
+ EXT2_SB(sb)->s_bgi[group].debts--;
         }
+ spin_unlock(&EXT2_SB(sb)->s_bgi[group].ialloc_lock);
 
- mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
- sb->s_dirt = 1;
         inode->i_uid = current->fsuid;
         if (test_opt (sb, GRPID))
                 inode->i_gid = dir->i_gid;
@@ -552,7 +590,6 @@
         inode->i_generation = EXT2_SB(sb)->s_next_generation++;
         insert_inode_hash(inode);
 
- unlock_super(sb);
         if(DQUOT_ALLOC_INODE(inode)) {
                 DQUOT_DROP(inode);
                 goto fail3;
@@ -574,15 +611,8 @@
         return ERR_PTR(err);
 
 fail2:
- desc = ext2_get_group_desc (sb, group, &bh2);
- desc->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
- if (S_ISDIR(mode))
- desc->bg_used_dirs_count =
- cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
- mark_buffer_dirty(bh2);
+ ext2_release_inode(sb, group, S_ISDIR(mode));
 fail:
- unlock_super(sb);
         make_bad_inode(inode);
         iput(inode);
         return ERR_PTR(err);
@@ -603,18 +633,26 @@
         goto repeat;
 }
 
-unsigned long ext2_count_free_inodes (struct super_block * sb)
+unsigned long ext2_count_free_inodes(struct super_block *sb)
+{
+ return dcounter_value(&EXT2_SB(sb)->free_inodes_dc);
+}
+
+unsigned long ext2_count_free_inodes_old(struct super_block *sb)
 {
+ struct ext2_group_desc *desc;
+ unsigned long desc_count = 0;
+ int i;
+
 #ifdef EXT2FS_DEBUG
         struct ext2_super_block * es;
- unsigned long desc_count = 0, bitmap_count = 0;
+ unsigned long bitmap_count = 0;
         struct buffer_head *bitmap_bh = NULL;
         int i;
 
         lock_super (sb);
         es = EXT2_SB(sb)->s_es;
         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
- struct ext2_group_desc *desc;
                 unsigned x;
 
                 desc = ext2_get_group_desc (sb, i, NULL);
@@ -637,7 +675,13 @@
         unlock_super(sb);
         return desc_count;
 #else
- return le32_to_cpu(EXT2_SB(sb)->s_es->s_free_inodes_count);
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ desc = ext2_get_group_desc (sb, i, NULL);
+ if (!desc)
+ continue;
+ desc_count += le16_to_cpu(desc->bg_free_inodes_count);
+ }
+ return desc_count;
 #endif
 }
 
diff -uNr linux-2.5.64/fs/ext2/super.c linux-2.5.64-ciba/fs/ext2/super.c
--- linux-2.5.64/fs/ext2/super.c Thu Feb 20 16:18:53 2003
+++ linux-2.5.64-ciba/fs/ext2/super.c Mon Mar 17 13:26:05 2003
@@ -35,6 +35,8 @@
                             struct ext2_super_block *es);
 static int ext2_remount (struct super_block * sb, int * flags, char * data);
 static int ext2_statfs (struct super_block * sb, struct statfs * buf);
+unsigned long ext2_count_free_inodes_old(struct super_block *sb);
+unsigned long ext2_count_free_blocks_old (struct super_block * sb);
 
 static char error_buf[1024];
 
@@ -141,7 +143,7 @@
                 if (sbi->s_group_desc[i])
                         brelse (sbi->s_group_desc[i]);
         kfree(sbi->s_group_desc);
- kfree(sbi->s_debts);
+ kfree(sbi->s_bgi);
         brelse (sbi->s_sbh);
         sb->s_fs_info = NULL;
         kfree(sbi);
@@ -464,8 +466,11 @@
         int i;
         int desc_block = 0;
         struct ext2_sb_info *sbi = EXT2_SB(sb);
- unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
+ struct ext2_super_block * es = sbi->s_es;
+ unsigned long block = le32_to_cpu(es->s_first_data_block);
         struct ext2_group_desc * gdp = NULL;
+ unsigned int total_free = 0, free;
+ unsigned int reserved = le32_to_cpu(es->s_r_blocks_count);
 
         ext2_debug ("Checking group descriptors");
 
@@ -504,6 +509,30 @@
                 block += EXT2_BLOCKS_PER_GROUP(sb);
                 gdp++;
         }
+
+ total_free = le32_to_cpu (es->s_free_blocks_count);
+ dcounter_init(&EXT2_SB(sb)->free_blocks_dc, total_free, 0);
+ dcounter_init(&EXT2_SB(sb)->free_inodes_dc,
+ le32_to_cpu (es->s_free_inodes_count), 0);
+ dcounter_init(&EXT2_SB(sb)->dirs_dc, ext2_count_dirs(sb), 1);
+
+ /* distribute reserved blocks over groups -bzzz */
+ for(i = sbi->s_groups_count-1; reserved && total_free && i >= 0; i--) {
+ gdp = ext2_get_group_desc (sb, i, NULL);
+ if (!gdp) {
+ ext2_error (sb, "ext2_check_descriptors",
+ "cant get descriptor for group %d", i);
+ return 0;
+ }
+
+ free = le16_to_cpu(gdp->bg_free_blocks_count);
+ if (free > reserved)
+ free = reserved;
+ sbi->s_bgi[i].reserved = free;
+ reserved -= free;
+ total_free -= free;
+ }
+
         return 1;
 }
 
@@ -768,13 +797,18 @@
                 printk ("EXT2-fs: not enough memory\n");
                 goto failed_mount;
         }
- sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
+ sbi->s_bgi = kmalloc(sbi->s_groups_count*sizeof(struct ext2_bg_info),
                                GFP_KERNEL);
- if (!sbi->s_debts) {
+ if (!sbi->s_bgi) {
                 printk ("EXT2-fs: not enough memory\n");
                 goto failed_mount_group_desc;
         }
- memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
+ for (i = 0; i < sbi->s_groups_count; i++) {
+ sbi->s_bgi[i].debts = 0;
+ sbi->s_bgi[i].reserved = 0;
+ spin_lock_init(&sbi->s_bgi[i].balloc_lock);
+ spin_lock_init(&sbi->s_bgi[i].ialloc_lock);
+ }
         for (i = 0; i < db_count; i++) {
                 block = descriptor_loc(sb, logic_sb_block, i);
                 sbi->s_group_desc[i] = sb_bread(sb, block);
@@ -820,8 +854,8 @@
                 brelse(sbi->s_group_desc[i]);
 failed_mount_group_desc:
         kfree(sbi->s_group_desc);
- if (sbi->s_debts)
- kfree(sbi->s_debts);
+ if (sbi->s_bgi)
+ kfree(sbi->s_bgi);
 failed_mount:
         brelse(bh);
 failed_sbi:
@@ -840,6 +874,22 @@
 
 static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
 {
+ if (dcounter_value(&EXT2_SB(sb)->dirs_dc) != ext2_count_dirs(sb))
+ printk("EXT2-fs: invalid dirs_dc %d (real %d)\n",
+ (int) dcounter_value(&EXT2_SB(sb)->dirs_dc),
+ (int) ext2_count_dirs(sb));
+ if (ext2_count_free_blocks(sb) != ext2_count_free_blocks_old(sb))
+ printk("EXT2-fs: invalid free blocks dcounter %d (real %d)\n",
+ (int) ext2_count_free_blocks(sb),
+ (int) ext2_count_free_blocks_old(sb));
+ es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
+
+ if (ext2_count_free_inodes(sb) != ext2_count_free_inodes_old(sb))
+ printk("EXT2-fs: invalid free inodes dcounter %d (real %d)\n",
+ (int) ext2_count_free_inodes(sb),
+ (int) ext2_count_free_inodes_old(sb));
+ es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
+
         es->s_wtime = cpu_to_le32(get_seconds());
         mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
         sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
@@ -868,6 +918,25 @@
                         ext2_debug ("setting valid to 0\n");
                         es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) &
                                                   ~EXT2_VALID_FS);
+ if (dcounter_value(&EXT2_SB(sb)->dirs_dc) != ext2_count_dirs(sb))
+ printk("EXT2-fs: invalid dirs_dc %d (real %d)\n",
+ (int) dcounter_value(&EXT2_SB(sb)->dirs_dc),
+ (int) ext2_count_dirs(sb));
+
+ es->s_free_blocks_count =
+ cpu_to_le32(ext2_count_free_blocks(sb));
+ if (ext2_count_free_blocks(sb) != ext2_count_free_blocks_old(sb))
+ printk("EXT2-fs: invalid free blocks dcounter %d (real %d)\n",
+ (int)ext2_count_free_blocks(sb),
+ (int)ext2_count_free_blocks_old(sb));
+
+ es->s_free_inodes_count =
+ cpu_to_le32(ext2_count_free_inodes(sb));
+ if (ext2_count_free_inodes(sb) != ext2_count_free_inodes_old(sb))
+ printk("EXT2-fs: invalid free inodes dcounter %d (real %d)\n",
+ (int)ext2_count_free_inodes(sb),
+ (int)ext2_count_free_inodes_old(sb));
+
                         es->s_mtime = cpu_to_le32(get_seconds());
                         ext2_sync_super(sb, es);
                 } else
@@ -929,7 +998,8 @@
 static int ext2_statfs (struct super_block * sb, struct statfs * buf)
 {
         struct ext2_sb_info *sbi = EXT2_SB(sb);
- unsigned long overhead;
+ unsigned long overhead, total_free = 0;
+ struct ext2_group_desc *desc;
         int i;
 
         if (test_opt (sb, MINIX_DF))
@@ -950,9 +1020,14 @@
                  * block group descriptors. If the sparse superblocks
                  * feature is turned on, then not all groups have this.
                  */
- for (i = 0; i < sbi->s_groups_count; i++)
+ for (i = 0; i < sbi->s_groups_count; i++) {
                         overhead += ext2_bg_has_super(sb, i) +
                                 ext2_bg_num_gdb(sb, i);
+
+ /* sum total free blocks -bzzz */
+ desc = ext2_get_group_desc (sb, i, NULL);
+ total_free += le16_to_cpu(desc->bg_free_blocks_count);
+ }
 
                 /*
                  * Every block group has an inode bitmap, a block
@@ -965,7 +1040,7 @@
         buf->f_type = EXT2_SUPER_MAGIC;
         buf->f_bsize = sb->s_blocksize;
         buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead;
- buf->f_bfree = ext2_count_free_blocks (sb);
+ buf->f_bfree = total_free;
         buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count);
         if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count))
                 buf->f_bavail = 0;
diff -uNr linux-2.5.64/include/asm-alpha/bitops.h linux-2.5.64-ciba/include/asm-alpha/bitops.h
--- linux-2.5.64/include/asm-alpha/bitops.h Fri Mar 14 01:53:36 2003
+++ linux-2.5.64-ciba/include/asm-alpha/bitops.h Mon Mar 17 13:22:58 2003
@@ -487,7 +487,9 @@
 
 
 #define ext2_set_bit __test_and_set_bit
+#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a)
 #define ext2_clear_bit __test_and_clear_bit
+#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a)
 #define ext2_test_bit test_bit
 #define ext2_find_first_zero_bit find_first_zero_bit
 #define ext2_find_next_zero_bit find_next_zero_bit
diff -uNr linux-2.5.64/include/asm-arm/bitops.h linux-2.5.64-ciba/include/asm-arm/bitops.h
--- linux-2.5.64/include/asm-arm/bitops.h Fri Mar 14 01:53:36 2003
+++ linux-2.5.64-ciba/include/asm-arm/bitops.h Mon Mar 17 13:22:58 2003
@@ -357,8 +357,12 @@
  */
 #define ext2_set_bit(nr,p) \
                 __test_and_set_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
+#define ext2_set_bit_atomic(lock,nr,p) \
+ test_and_set_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
 #define ext2_clear_bit(nr,p) \
                 __test_and_clear_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
+#define ext2_clear_bit_atomic(lock,nr,p) \
+ test_and_clear_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
 #define ext2_test_bit(nr,p) \
                 __test_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
 #define ext2_find_first_zero_bit(p,sz) \
diff -uNr linux-2.5.64/include/asm-cris/bitops.h linux-2.5.64-ciba/include/asm-cris/bitops.h
--- linux-2.5.64/include/asm-cris/bitops.h Mon Nov 11 06:28:30 2002
+++ linux-2.5.64-ciba/include/asm-cris/bitops.h Mon Mar 17 13:22:58 2003
@@ -360,7 +360,9 @@
 #define hweight8(x) generic_hweight8(x)
 
 #define ext2_set_bit test_and_set_bit
+#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a)
 #define ext2_clear_bit test_and_clear_bit
+#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a)
 #define ext2_test_bit test_bit
 #define ext2_find_first_zero_bit find_first_zero_bit
 #define ext2_find_next_zero_bit find_next_zero_bit
diff -uNr linux-2.5.64/include/asm-i386/bitops.h linux-2.5.64-ciba/include/asm-i386/bitops.h
--- linux-2.5.64/include/asm-i386/bitops.h Wed Dec 25 06:03:08 2002
+++ linux-2.5.64-ciba/include/asm-i386/bitops.h Mon Mar 17 13:22:58 2003
@@ -479,8 +479,12 @@
 
 #define ext2_set_bit(nr,addr) \
         __test_and_set_bit((nr),(unsigned long*)addr)
+#define ext2_set_bit_atomic(lock,nr,addr) \
+ test_and_set_bit((nr),(unsigned long*)addr)
 #define ext2_clear_bit(nr, addr) \
         __test_and_clear_bit((nr),(unsigned long*)addr)
+#define ext2_clear_bit_atomic(lock,nr, addr) \
+ test_and_clear_bit((nr),(unsigned long*)addr)
 #define ext2_test_bit(nr, addr) test_bit((nr),(unsigned long*)addr)
 #define ext2_find_first_zero_bit(addr, size) \
         find_first_zero_bit((unsigned long*)addr, size)
diff -uNr linux-2.5.64/include/asm-ia64/bitops.h linux-2.5.64-ciba/include/asm-ia64/bitops.h
--- linux-2.5.64/include/asm-ia64/bitops.h Thu Feb 20 16:18:21 2003
+++ linux-2.5.64-ciba/include/asm-ia64/bitops.h Mon Mar 17 13:22:58 2003
@@ -453,7 +453,9 @@
 #define __clear_bit(nr, addr) clear_bit(nr, addr)
 
 #define ext2_set_bit test_and_set_bit
+#define ext2_set_atomic(l,n,a) test_and_set_bit(n,a)
 #define ext2_clear_bit test_and_clear_bit
+#define ext2_clear_atomic(l,n,a) test_and_clear_bit(n,a)
 #define ext2_test_bit test_bit
 #define ext2_find_first_zero_bit find_first_zero_bit
 #define ext2_find_next_zero_bit find_next_zero_bit
diff -uNr linux-2.5.64/include/asm-m68k/bitops.h linux-2.5.64-ciba/include/asm-m68k/bitops.h
--- linux-2.5.64/include/asm-m68k/bitops.h Mon Nov 11 06:28:33 2002
+++ linux-2.5.64-ciba/include/asm-m68k/bitops.h Mon Mar 17 13:23:28 2003
@@ -365,6 +365,24 @@
         return retval;
 }
 
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_set_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_clear_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
 extern __inline__ int
 ext2_test_bit (int nr, const volatile void *vaddr)
 {
diff -uNr linux-2.5.64/include/asm-m68knommu/bitops.h linux-2.5.64-ciba/include/asm-m68knommu/bitops.h
--- linux-2.5.64/include/asm-m68knommu/bitops.h Mon Nov 11 06:28:04 2002
+++ linux-2.5.64-ciba/include/asm-m68knommu/bitops.h Mon Mar 17 13:23:31 2003
@@ -402,6 +402,24 @@
         return retval;
 }
 
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_set_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_clear_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
 extern __inline__ int ext2_test_bit(int nr, const volatile void * addr)
 {
         int mask;
diff -uNr linux-2.5.64/include/asm-mips/bitops.h linux-2.5.64-ciba/include/asm-mips/bitops.h
--- linux-2.5.64/include/asm-mips/bitops.h Mon Nov 11 06:28:03 2002
+++ linux-2.5.64-ciba/include/asm-mips/bitops.h Mon Mar 17 13:23:22 2003
@@ -824,6 +824,24 @@
         return retval;
 }
 
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_set_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_clear_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
 extern __inline__ int ext2_test_bit(int nr, const void * addr)
 {
         int mask;
@@ -890,7 +908,9 @@
 
 /* Native ext2 byte ordering, just collapse using defines. */
 #define ext2_set_bit(nr, addr) test_and_set_bit((nr), (addr))
+#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr), (addr))
 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr), (addr))
+#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr), (addr))
 #define ext2_test_bit(nr, addr) test_bit((nr), (addr))
 #define ext2_find_first_zero_bit(addr, size) find_first_zero_bit((addr), (size))
 #define ext2_find_next_zero_bit(addr, size, offset) \
diff -uNr linux-2.5.64/include/asm-mips64/bitops.h linux-2.5.64-ciba/include/asm-mips64/bitops.h
--- linux-2.5.64/include/asm-mips64/bitops.h Mon Nov 11 06:28:29 2002
+++ linux-2.5.64-ciba/include/asm-mips64/bitops.h Mon Mar 17 13:23:25 2003
@@ -531,6 +531,24 @@
         return retval;
 }
 
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_set_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_clear_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
 extern inline int
 ext2_test_bit(int nr, const void * addr)
 {
@@ -599,7 +617,9 @@
 
 /* Native ext2 byte ordering, just collapse using defines. */
 #define ext2_set_bit(nr, addr) test_and_set_bit((nr), (addr))
+#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr), (addr))
 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr), (addr))
+#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr), (addr))
 #define ext2_test_bit(nr, addr) test_bit((nr), (addr))
 #define ext2_find_first_zero_bit(addr, size) find_first_zero_bit((addr), (size))
 #define ext2_find_next_zero_bit(addr, size, offset) \
diff -uNr linux-2.5.64/include/asm-parisc/bitops.h linux-2.5.64-ciba/include/asm-parisc/bitops.h
--- linux-2.5.64/include/asm-parisc/bitops.h Thu Feb 20 16:18:21 2003
+++ linux-2.5.64-ciba/include/asm-parisc/bitops.h Mon Mar 17 13:22:58 2003
@@ -389,10 +389,14 @@
  */
 #ifdef __LP64__
 #define ext2_set_bit(nr, addr) test_and_set_bit((nr) ^ 0x38, addr)
+#define ext2_set_bit_atomic(l,nr,addr) test_and_set_bit((nr) ^ 0x38, addr)
 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr) ^ 0x38, addr)
+#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x38, addr)
 #else
 #define ext2_set_bit(nr, addr) test_and_set_bit((nr) ^ 0x18, addr)
+#define ext2_set_bit_atomic(l,nr,addr) test_and_set_bit((nr) ^ 0x18, addr)
 #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr) ^ 0x18, addr)
+#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x18, addr)
 #endif
 
 #endif /* __KERNEL__ */
diff -uNr linux-2.5.64/include/asm-ppc/bitops.h linux-2.5.64-ciba/include/asm-ppc/bitops.h
--- linux-2.5.64/include/asm-ppc/bitops.h Mon Jan 20 05:23:05 2003
+++ linux-2.5.64-ciba/include/asm-ppc/bitops.h Mon Mar 17 13:22:58 2003
@@ -392,7 +392,9 @@
 
 
 #define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
+#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
 #define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
+#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
 
 static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
 {
diff -uNr linux-2.5.64/include/asm-ppc64/bitops.h linux-2.5.64-ciba/include/asm-ppc64/bitops.h
--- linux-2.5.64/include/asm-ppc64/bitops.h Mon Nov 11 06:28:28 2002
+++ linux-2.5.64-ciba/include/asm-ppc64/bitops.h Mon Mar 17 13:23:17 2003
@@ -338,6 +338,25 @@
         __test_and_set_le_bit((nr),(unsigned long*)addr)
 #define ext2_clear_bit(nr, addr) \
         __test_and_clear_le_bit((nr),(unsigned long*)addr)
+
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_set_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_clear_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
 #define ext2_test_bit(nr, addr) test_le_bit((nr),(unsigned long*)addr)
 #define ext2_find_first_zero_bit(addr, size) \
         find_first_zero_le_bit((unsigned long*)addr, size)
diff -uNr linux-2.5.64/include/asm-s390/bitops.h linux-2.5.64-ciba/include/asm-s390/bitops.h
--- linux-2.5.64/include/asm-s390/bitops.h Fri Mar 14 01:53:27 2003
+++ linux-2.5.64-ciba/include/asm-s390/bitops.h Mon Mar 17 13:22:58 2003
@@ -805,8 +805,12 @@
 
 #define ext2_set_bit(nr, addr) \
         test_and_set_bit((nr)^24, (unsigned long *)addr)
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ test_and_set_bit((nr)^24, (unsigned long *)addr)
 #define ext2_clear_bit(nr, addr) \
         test_and_clear_bit((nr)^24, (unsigned long *)addr)
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ test_and_clear_bit((nr)^24, (unsigned long *)addr)
 #define ext2_test_bit(nr, addr) \
         test_bit((nr)^24, (unsigned long *)addr)
 
diff -uNr linux-2.5.64/include/asm-s390x/bitops.h linux-2.5.64-ciba/include/asm-s390x/bitops.h
--- linux-2.5.64/include/asm-s390x/bitops.h Fri Mar 14 01:53:27 2003
+++ linux-2.5.64-ciba/include/asm-s390x/bitops.h Mon Mar 17 13:22:58 2003
@@ -838,8 +838,12 @@
 
 #define ext2_set_bit(nr, addr) \
         test_and_set_bit((nr)^56, (unsigned long *)addr)
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ test_and_set_bit((nr)^56, (unsigned long *)addr)
 #define ext2_clear_bit(nr, addr) \
         test_and_clear_bit((nr)^56, (unsigned long *)addr)
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ test_and_clear_bit((nr)^56, (unsigned long *)addr)
 #define ext2_test_bit(nr, addr) \
         test_bit((nr)^56, (unsigned long *)addr)
 
diff -uNr linux-2.5.64/include/asm-sh/bitops.h linux-2.5.64-ciba/include/asm-sh/bitops.h
--- linux-2.5.64/include/asm-sh/bitops.h Mon Nov 11 06:28:02 2002
+++ linux-2.5.64-ciba/include/asm-sh/bitops.h Mon Mar 17 13:23:33 2003
@@ -344,6 +344,24 @@
 }
 #endif
 
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_set_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_clear_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
 /* Bitmap functions for the minix filesystem. */
 #define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
 #define minix_set_bit(nr,addr) set_bit(nr,addr)
diff -uNr linux-2.5.64/include/asm-sparc/bitops.h linux-2.5.64-ciba/include/asm-sparc/bitops.h
--- linux-2.5.64/include/asm-sparc/bitops.h Mon Jan 20 05:23:05 2003
+++ linux-2.5.64-ciba/include/asm-sparc/bitops.h Mon Mar 17 13:23:19 2003
@@ -455,6 +455,25 @@
 
 #define ext2_set_bit __test_and_set_le_bit
 #define ext2_clear_bit __test_and_clear_le_bit
+
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_set_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ ({ \
+ int ret; \
+ spin_lock(lock); \
+ ret = ext2_clear_bit((nr), (addr)); \
+ spin_unlock(lock); \
+ ret; \
+ })
+
 #define ext2_test_bit test_le_bit
 #define ext2_find_first_zero_bit find_first_zero_le_bit
 #define ext2_find_next_zero_bit find_next_zero_le_bit
diff -uNr linux-2.5.64/include/asm-sparc64/bitops.h linux-2.5.64-ciba/include/asm-sparc64/bitops.h
--- linux-2.5.64/include/asm-sparc64/bitops.h Mon Nov 11 06:28:05 2002
+++ linux-2.5.64-ciba/include/asm-sparc64/bitops.h Mon Mar 17 13:22:58 2003
@@ -351,7 +351,9 @@
 #ifdef __KERNEL__
 
 #define ext2_set_bit(nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
+#define ext2_set_bit_atomic(lock,nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
 #define ext2_clear_bit(nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
+#define ext2_clear_bit_atomic(lock,nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
 #define ext2_test_bit(nr,addr) test_le_bit((nr),(unsigned long *)(addr))
 #define ext2_find_first_zero_bit(addr, size) \
         find_first_zero_le_bit((unsigned long *)(addr), (size))
diff -uNr linux-2.5.64/include/asm-v850/bitops.h linux-2.5.64-ciba/include/asm-v850/bitops.h
--- linux-2.5.64/include/asm-v850/bitops.h Mon Nov 11 06:28:02 2002
+++ linux-2.5.64-ciba/include/asm-v850/bitops.h Mon Mar 17 13:22:58 2003
@@ -252,7 +252,9 @@
 #define hweight8(x) generic_hweight8 (x)
 
 #define ext2_set_bit test_and_set_bit
+#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a)
 #define ext2_clear_bit test_and_clear_bit
+#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a)
 #define ext2_test_bit test_bit
 #define ext2_find_first_zero_bit find_first_zero_bit
 #define ext2_find_next_zero_bit find_next_zero_bit
diff -uNr linux-2.5.64/include/asm-x86_64/bitops.h linux-2.5.64-ciba/include/asm-x86_64/bitops.h
--- linux-2.5.64/include/asm-x86_64/bitops.h Fri Mar 14 01:53:27 2003
+++ linux-2.5.64-ciba/include/asm-x86_64/bitops.h Mon Mar 17 13:22:58 2003
@@ -487,8 +487,12 @@
 
 #define ext2_set_bit(nr,addr) \
         __test_and_set_bit((nr),(unsigned long*)addr)
+#define ext2_set_bit_atomic(lock,nr,addr) \
+ test_and_set_bit((nr),(unsigned long*)addr)
 #define ext2_clear_bit(nr, addr) \
         __test_and_clear_bit((nr),(unsigned long*)addr)
+#define ext2_clear_bit_atomic(lock,nr,addr) \
+ test_and_clear_bit((nr),(unsigned long*)addr)
 #define ext2_test_bit(nr, addr) test_bit((nr),(unsigned long*)addr)
 #define ext2_find_first_zero_bit(addr, size) \
         find_first_zero_bit((unsigned long*)addr, size)
diff -uNr linux-2.5.64/include/linux/dcounter.h linux-2.5.64-ciba/include/linux/dcounter.h
--- linux-2.5.64/include/linux/dcounter.h Thu Jan 1 03:00:00 1970
+++ linux-2.5.64-ciba/include/linux/dcounter.h Mon Mar 17 13:26:05 2003
@@ -0,0 +1,85 @@
+#ifndef _DCOUNTER_H_
+#define _DCOUNTER_H_
+/*
+ * Distrubuted counters:
+ *
+ * Problem:
+ * 1) we have to support global counter for some subsystems
+ * for example, ext2
+ * 2) we do not want to use spinlocks/atomic_t because of cache ping-pong
+ * 3) counter may have some fluctuation
+ * for example, number of free blocks in ext2
+ *
+ * Solution:
+ * 1) there is 'base' counter
+ * 2) each CPU supports own 'diff'
+ * 3) global value calculated as sum of base and all diff'es
+ * 4) sometimes diff goes to base in order to prevent int overflow.
+ * this 'syncronization' uses seqlock
+ *
+ *
+ * written by Alex Tomas <bzzz@tmi.comex.ru>
+ */
+
+#include <linux/smp.h>
+#include <linux/seqlock.h>
+#include <linux/string.h>
+
+#define DCOUNTER_MAX_DIFF ((1 << 31) / NR_CPUS - 1000)
+
+struct dcounter_diff {
+ long dd_value;
+} ____cacheline_aligned_in_smp;
+
+struct dcounter {
+ long dc_base;
+ long dc_min;
+ struct dcounter_diff dc_diff[NR_CPUS];
+ seqlock_t dc_lock;
+};
+
+static inline void dcounter_init(struct dcounter *dc, int value, int min)
+{
+ seqlock_init(&dc->dc_lock);
+ dc->dc_base = value;
+ dc->dc_min = min;
+ memset(dc->dc_diff, 0, sizeof(struct dcounter_diff) * NR_CPUS);
+}
+
+static inline int dcounter_value(struct dcounter *dc)
+{
+ int i;
+ int counter;
+ int seq;
+
+ do {
+ seq = read_seqbegin(&dc->dc_lock);
+ counter = dc->dc_base;
+ for (i = 0; i < NR_CPUS; i++)
+ counter += dc->dc_diff[i].dd_value;
+ } while (read_seqretry(&dc->dc_lock, seq));
+
+ if (counter < dc->dc_min)
+ counter = dc->dc_min;
+ return counter;
+}
+
+static inline void dcounter_add(struct dcounter *dc, int value)
+{
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+ dc->dc_diff[cpu].dd_value += value;
+ if (dc->dc_diff[cpu].dd_value > DCOUNTER_MAX_DIFF ||
+ dc->dc_diff[cpu].dd_value < -DCOUNTER_MAX_DIFF) {
+ write_seqlock(&dc->dc_lock);
+ dc->dc_base += dc->dc_diff[cpu].dd_value;
+ dc->dc_diff[cpu].dd_value = 0;
+ write_sequnlock(&dc->dc_lock);
+ }
+ preempt_enable();
+}
+
+#endif /* _DCOUNTER_H_ */
+
diff -uNr linux-2.5.64/include/linux/ext2_fs_sb.h linux-2.5.64-ciba/include/linux/ext2_fs_sb.h
--- linux-2.5.64/include/linux/ext2_fs_sb.h Mon Nov 11 06:28:30 2002
+++ linux-2.5.64-ciba/include/linux/ext2_fs_sb.h Mon Mar 17 13:26:05 2003
@@ -16,6 +16,15 @@
 #ifndef _LINUX_EXT2_FS_SB
 #define _LINUX_EXT2_FS_SB
 
+#include <linux/dcounter.h>
+
+struct ext2_bg_info {
+ u8 debts;
+ spinlock_t balloc_lock;
+ spinlock_t ialloc_lock;
+ unsigned int reserved;
+} ____cacheline_aligned_in_smp;
+
 /*
  * second extended-fs super-block data in memory
  */
@@ -44,7 +53,10 @@
         int s_first_ino;
         u32 s_next_generation;
         unsigned long s_dir_count;
- u8 *s_debts;
+ struct ext2_bg_info *s_bgi;
+ struct dcounter free_blocks_dc;
+ struct dcounter free_inodes_dc;
+ struct dcounter dirs_dc;
 };
 
 #endif /* _LINUX_EXT2_FS_SB */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Mar 23 2003 - 22:00:19 EST