pre-patch 2.1.45 inode patches

Bill Hawes (whawes@star.net)
Fri, 11 Jul 1997 12:49:59 -0400


This is a multi-part message in MIME format.
--------------EC1D2B3F5E12059EDB7C54BB
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've added some new things to the inode code -- device-specific syncing,
a "show_inodes" function (a la "show_buffers") hooked into shift-scroll
lock, and a quick futility test for try_to_free_inodes.

Everything seems to run cleanly now, and if you've got enough memory to
hold all the inodes you ever reference, 2.1.45 is the kernel for you :-)

-Bill
--------------EC1D2B3F5E12059EDB7C54BB
Content-Type: text/plain; charset=us-ascii; name="inode_pre45-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="inode_pre45-patch"

--- fs/inode.c.old Wed Jul 9 07:30:36 1997
+++ fs/inode.c Fri Jul 11 12:26:14 1997
@@ -20,6 +20,8 @@
* Famous last words.
*/

+#define INODE_PARANOIA 1
+
/*
* Inode lookup is no longer as critical as it used to be:
* most of the lookups are going to be through the dcache.
@@ -56,8 +58,9 @@
struct {
int nr_inodes;
int nr_free_inodes;
- int dummy[10];
-} inodes_stat;
+ int nr_may_free;
+ int dummy[9];
+} inodes_stat = {0, 0, 0};

int max_inodes = NR_INODE;

@@ -108,7 +111,6 @@
sema_init(&inode->i_sem, 1);
}

-
/*
* Look out! This returns with the inode lock held if
* it got an inode..
@@ -131,25 +133,43 @@
size -= sizeof(struct inode);
} while (size >= 0);
init_once(inode);
+ inodes_stat.nr_inodes += PAGE_SIZE / sizeof(struct inode);
}
return inode;
}

+/*
+ * This is called with I_LOCK set.
+ */
static inline void write_inode(struct inode *inode)
{
- if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
+ if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
inode->i_sb->s_op->write_inode(inode);
}

-static inline void sync_list(struct list_head *head, struct list_head *clean)
+static inline void sync_list(struct list_head *head, struct list_head *clean,
+ kdev_t dev)
{
- struct list_head * tmp;
+ struct list_head * tmp, * sentinel = NULL;

- while ((tmp = head->prev) != head) {
+ while ((tmp = head->prev) != head && tmp != sentinel) {
struct inode *inode = list_entry(tmp, struct inode, i_list);
+
list_del(tmp);

/*
+ * If we're syncing one device and this inode isn't
+ * on that device, move it to the front of the list
+ * and set the sentinel so we know when we're done.
+ */
+ if (dev && inode->i_dev != dev) {
+ if (sentinel == NULL)
+ sentinel = tmp;
+ list_add(tmp, head);
+ continue;
+ }
+
+ /*
* If the inode is locked, it's already being written out.
* We have to wait for it, though.
*/
@@ -165,6 +185,7 @@
write_inode(inode);
unlock_inode(inode);
}
+ sentinel = NULL;
spin_lock(&inode_lock);
}
}
@@ -177,27 +198,48 @@
void sync_inodes(kdev_t dev)
{
spin_lock(&inode_lock);
- sync_list(&inode_dirty, &inode_in_use);
+ sync_list(&inode_dirty, &inode_in_use, dev);
spin_unlock(&inode_lock);
}

/*
+ * Non-blocking version of clear_inode. Caller must hold the inode lock,
+ * and must verify that the inode is not locked and that no disk quota
+ * exists prior to calling. (Disk quota is released when the use count
+ * goes from 1 to 0.)
+ */
+static void _clear_inode(struct inode *inode)
+{
+#ifdef INODE_PARANOIA
+if (test_bit(I_LOCK, &inode->i_state))
+printk("_clear_inode: inode %ld locked!\n", inode->i_ino);
+if (IS_WRITABLE(inode))
+printk("_clear_inode: inode %ld disk quota not released!\n", inode->i_ino);
+#endif
+ /*
+ * This must be non-blocking to be completely safe.
+ */
+ truncate_inode_pages(inode, 0);
+
+ inode->i_state = 0;
+ list_del(&inode->i_hash);
+ INIT_LIST_HEAD(&inode->i_hash);
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &inode_unused);
+}
+
+/*
* This is called by the filesystem to tell us
* that the inode is no longer useful. We just
- * terminate it with extreme predjudice.
+ * terminate it with extreme prejudice.
*/
void clear_inode(struct inode *inode)
{
- truncate_inode_pages(inode, 0);
- wait_on_inode(inode);
- if (IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
- inode->i_sb->dq_op->drop(inode);
+if (atomic_read(&(inode)->i_count) != 1)
+printk("clear_inode: count=%d\n", atomic_read(&(inode)->i_count));

spin_lock(&inode_lock);
- inode->i_state = 0;
- list_del(&inode->i_hash);
- list_del(&inode->i_list);
- list_add(&inode->i_list, &inode_unused);
+ _clear_inode(inode);
spin_unlock(&inode_lock);
}

@@ -206,9 +248,10 @@
((inode)->i_nrpages == 0) && \
(!test_bit(I_LOCK, &(inode)->i_state)))

-static void invalidate_list(struct list_head *head, kdev_t dev)
+static int invalidate_list(struct list_head *head, kdev_t dev, int check)
{
struct list_head *next;
+ int busy = 0;

next = head->next;
for (;;) {
@@ -221,70 +264,113 @@
inode = list_entry(tmp, struct inode, i_list);
if (inode->i_dev != dev)
continue;
- if (!CAN_UNUSE(inode))
- continue;
- list_del(&inode->i_hash);
- list_del(&inode->i_list);
- list_add(&inode->i_list, &inode_unused);
+ if (atomic_read(&(inode)->i_count) == 0 &&
+ !test_bit(I_DIRTY, &(inode)->i_state) &&
+ !test_bit(I_LOCK , &(inode)->i_state))
+ _clear_inode(inode);
+ else {
+ busy = 1;
+ if (check)
+ break;
+ }
}
+ return busy;
}

void invalidate_inodes(kdev_t dev)
{
spin_lock(&inode_lock);
- invalidate_list(&inode_in_use, dev);
- invalidate_list(&inode_dirty, dev);
+ invalidate_list(&inode_in_use, dev, 0);
+ invalidate_list(&inode_dirty , dev, 0);
spin_unlock(&inode_lock);
}

/*
- * This is called with the inode lock held. It just looks at the last
- * inode on the in-use list, and if the inode is trivially freeable
- * we just move it to the unused list.
+ * This is called with the inode lock held. It checks for potentially
+ * freeable inodes and uses _clear_inodes to free them.
*
- * Otherwise we just move the inode to be the first inode and expect to
- * get back to the problem later..
+ * If the first pass fails, we try syncing and check again.
*/
static void try_to_free_inodes(void)
{
- struct list_head * tmp;
+ struct list_head *tmp, *prev;
struct list_head *head = &inode_in_use;
+ int passes = 2, goal = 10, freed = 0;

- tmp = head->prev;
- if (tmp != head) {
- struct inode * inode;
+ /*
+ * nr_may_free is an (over)estimate of the number of unused inodes,
+ * based on zero use counts as seen by iput(). This is useful as
+ * a futility avoidance measure.
+ */
+ if (!inodes_stat.nr_may_free)
+ return;
+ /*
+ * Adjust our goal if there aren't very many inodes.
+ */
+ if (goal > inodes_stat.nr_may_free)
+ goal = inodes_stat.nr_may_free;
+ if (goal > (inodes_stat.nr_inodes >> 5))
+ goal = inodes_stat.nr_inodes >> 5;
+
+ while (passes--) {
+ for (tmp = head->prev; tmp != head; tmp = prev) {
+ struct inode * inode;
+
+ prev = tmp->prev;
+ inode = list_entry(tmp, struct inode, i_list);
+ if (atomic_read(&(inode)->i_count) != 0)
+ continue;
+ if (test_bit(I_LOCK, &(inode)->i_state))
+ continue;
+ /*
+ * Inode is potentially freeable ... should select
+ * least cost inodes though.
+ */
+ _clear_inode(inode);
+ inodes_stat.nr_may_free--;
+ freed++;
+ if (freed >= goal)
+ return;
+ }

- list_del(tmp);
- inode = list_entry(tmp, struct inode, i_list);
- if (CAN_UNUSE(inode)) {
- list_del(&inode->i_hash);
- head = &inode_unused;
+ if (freed) {
+printk("try_to_free_inodes: inodes=%d got %d\n", inodes_stat.nr_inodes, freed);
+ return;
+ }
+
+ /*
+ * First pass failed ... maybe syncing will help.
+ */
+ if (inode_dirty.next != &inode_dirty) {
+ sync_list(&inode_dirty, &inode_in_use, 0);
+ } else {
+ /*
+ * None available ... reset the "maybe free" counter.
+ */
+ inodes_stat.nr_may_free = 0;
+ break;
}
- list_add(tmp, head);
}
}
-

+/*
+ * Called with the inode lock held.
+ */
static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
{
struct list_head *tmp;
struct inode * inode;

- tmp = head;
- for (;;) {
- tmp = tmp->next;
- inode = NULL;
- if (tmp == head)
- break;
+ for (tmp = head->next; tmp != head; tmp = tmp->next) {
inode = list_entry(tmp, struct inode, i_hash);
if (inode->i_sb != sb)
continue;
if (inode->i_ino != ino)
continue;
atomic_inc(&inode->i_count);
- break;
+ return inode;
}
- return inode;
+ return NULL;
}

/*
@@ -294,13 +380,15 @@
* i_sb, i_ino, i_count, i_state and the lists have
* been initialized elsewhere..
*/
-void clean_inode(struct inode *inode)
+static void clean_inode(struct inode *inode)
{
memset(&inode->u, 0, sizeof(inode->u));
inode->i_pipe = 0;
inode->i_sock = 0;
+ inode->i_mode = 0;
inode->i_op = NULL;
inode->i_nlink = 1;
+ inode->i_writecount = 0;
memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
sema_init(&inode->i_sem, 1);
}
@@ -315,66 +403,85 @@
unlock_inode(inode);
}

+/*
+ * N.B. This may now return NULL -- check for failure!
+ */
struct inode * get_empty_inode(void)
{
static unsigned long last_ino = 0;
struct inode * inode;
struct list_head * tmp = inode_unused.next;

+ spin_lock(&inode_lock);
if (tmp != &inode_unused) {
+get_unused:
list_del(tmp);
inode = list_entry(tmp, struct inode, i_list);
+#ifdef INODE_PARANOIA
+if (atomic_read(&(inode)->i_count))
+printk("get_empty_inode: unused count=%d\n", atomic_read(&(inode)->i_count));
+#endif
+
add_new_inode:
INIT_LIST_HEAD(&inode->i_list);
INIT_LIST_HEAD(&inode->i_hash);
inode->i_sb = NULL;
+ inode->i_dev = 0;
inode->i_ino = ++last_ino;
- atomic_set(&inode->i_count, 1);
+ inode->i_flags = 0;
+ atomic_inc(&inode->i_count);
inode->i_state = 0;
+ spin_unlock(&inode_lock);
clean_inode(inode);
return inode;
}

/*
- * Warning: if this succeeded, we will now
- * return with the inode lock, and we need to
- * unlock it.
+ * No unused inodes ... try to free some.
*/
+ try_to_free_inodes();
+ tmp = inode_unused.next;
+ if (tmp != &inode_unused)
+ goto get_unused;
+
+ /*
+ * Unlock to try to allocate more inodes.
+ */
+ spin_unlock(&inode_lock);
inode = grow_inodes();
- if (inode) {
- spin_unlock(&inode_lock);
+ if (inode)
goto add_new_inode;
- }
return inode;
}

struct inode * get_pipe_inode(void)
{
extern struct inode_operations pipe_inode_operations;
- struct inode *inode = get_empty_inode();
+ struct inode *inode;
+ unsigned long page = __get_free_page(GFP_USER);

- if (inode) {
- unsigned long page = __get_free_page(GFP_USER);
+ if (!page)
+ return NULL;

- if (!page) {
- iput(inode);
- inode = NULL;
- } else {
- PIPE_BASE(*inode) = (char *) page;
- inode->i_op = &pipe_inode_operations;
- atomic_set(&inode->i_count, 1);
- PIPE_WAIT(*inode) = NULL;
- PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
- PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
- PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
- PIPE_LOCK(*inode) = 0;
- inode->i_pipe = 1;
- inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_blksize = PAGE_SIZE;
- }
+ inode = get_empty_inode();
+ if (inode) {
+ PIPE_BASE(*inode) = (char *) page;
+ inode->i_op = &pipe_inode_operations;
+ atomic_inc(&inode->i_count); /* two references */
+ PIPE_WAIT(*inode) = NULL;
+ PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+ PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
+ PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+ PIPE_LOCK(*inode) = 0;
+ inode->i_pipe = 1;
+ inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_blksize = PAGE_SIZE;
+ }
+ else {
+ free_page(page);
}
return inode;
}
@@ -388,8 +495,14 @@
struct list_head * tmp = inode_unused.next;

if (tmp != &inode_unused) {
+get_unused:
list_del(tmp);
inode = list_entry(tmp, struct inode, i_list);
+#ifdef INODE_PARANOIA
+if (atomic_read(&(inode)->i_count))
+printk("get_new_inode: unused count=%d\n", atomic_read(&(inode)->i_count));
+#endif
+
add_new_inode:
list_add(&inode->i_list, &inode_in_use);
list_add(&inode->i_hash, head);
@@ -397,7 +510,7 @@
inode->i_dev = sb->s_dev;
inode->i_ino = ino;
inode->i_flags = sb->s_flags;
- atomic_set(&inode->i_count, 1);
+ atomic_inc(&inode->i_count);
inode->i_state = 1 << I_LOCK;
spin_unlock(&inode_lock);
clean_inode(inode);
@@ -406,6 +519,14 @@
}

/*
+ * No unused inodes ... try to free some.
+ */
+ try_to_free_inodes();
+ tmp = inode_unused.next;
+ if (tmp != &inode_unused)
+ goto get_unused;
+
+ /*
* Uhhuh.. We need to expand. Unlock for the allocation,
* but note that "grow_inodes()" will return with the
* lock held again if the allocation succeeded.
@@ -440,7 +561,6 @@
spin_lock(&inode_lock);
inode = find_inode(sb, ino, head);
if (!inode) {
- try_to_free_inodes();
return get_new_inode(sb, ino, head);
}
spin_unlock(&inode_lock);
@@ -451,28 +571,70 @@
void insert_inode_hash(struct inode *inode)
{
struct list_head *head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
+ spin_lock(&inode_lock);
list_add(&inode->i_hash, head);
+ spin_unlock(&inode_lock);
}

void iput(struct inode *inode)
{
- if (inode) {
- if (inode->i_pipe)
- wake_up_interruptible(&PIPE_WAIT(*inode));
+ if (!inode)
+ return;
+
+ if (inode->i_pipe)
+ wake_up_interruptible(&PIPE_WAIT(*inode));

- /*
- * Last user dropping the inode?
- */
- if (atomic_read(&inode->i_count) == 1) {
- void (*put)(struct inode *);
+ /*
+ * Last user dropping the inode?
+ */
+ while (atomic_read(&inode->i_count) == 1) {
+ if (test_bit(I_LOCK, &inode->i_state))
+ __wait_on_inode(inode);
+ else if (test_and_clear_bit(I_DIRTY, &inode->i_state)) {
+ set_bit(I_LOCK, &inode->i_state);
+ write_inode(inode);
+ unlock_inode(inode);
+ }
+ else if (IS_WRITABLE(inode) && inode->i_sb &&
+ inode->i_sb->dq_op) {
+ set_bit(I_LOCK, &inode->i_state);
+ inode->i_sb->dq_op->drop(inode);
+ unlock_inode(inode);
+ }
+ else {
+ /*
+ * The preceeding operations ensure that the
+ * put_inode() routine won't block in clear_inode.
+ */
if (inode->i_sb && inode->i_sb->s_op) {
+ void (*put)(struct inode *);
put = inode->i_sb->s_op->put_inode;
if (put)
put(inode);
}
+
+ /*
+ * Pipe inodes currently aren't in the lists,
+ * so we have to handle them specially.
+ */
+ if (inode->i_pipe) {
+ free_page((unsigned long)PIPE_BASE(*inode));
+ PIPE_BASE(*inode) = NULL;
+ clear_inode(inode);
+ }
+ break;
}
- atomic_dec(&inode->i_count);
}
+ atomic_dec(&inode->i_count);
+ if (!atomic_read(&inode->i_count)) {
+ inodes_stat.nr_may_free++;
+ }
+
+#ifdef INODE_PARANOIA
+if (atomic_read(&inode->i_count) < 0)
+printk("iput: open count wrapped dev=%d inode=%ld count=%d\n",
+inode->i_dev, inode->i_ino, atomic_read(&inode->i_count));
+#endif
}

int bmap(struct inode * inode, int block)
@@ -499,20 +661,116 @@
}

/*
- * FIXME! These need to go through the in-use inodes to
- * check whether we can mount/umount/remount.
+ * Check whether we can mount.
*/
int fs_may_mount(kdev_t dev)
{
- return 1;
+ int busy;
+
+ spin_lock(&inode_lock);
+ busy = invalidate_list(&inode_dirty , dev, 1) ||
+ invalidate_list(&inode_in_use, dev, 1);
+ spin_unlock(&inode_lock);
+ return !busy;
}

+/*
+ * Check whether we can unmount.
+ */
int fs_may_umount(kdev_t dev, struct dentry * root)
{
- return 0;
+ struct list_head * tmp;
+ struct list_head *head = &inode_in_use;
+ int busy = 0;
+
+ /*
+ * Sync and then invalidate all inodes for this device.
+ */
+ spin_lock(&inode_lock);
+ sync_list(&inode_dirty, &inode_in_use, dev);
+ invalidate_list(&inode_in_use, dev, 0);
+
+ /*
+ * Check the list to see if anything is still in use ...
+ */
+ for (tmp = head->next; tmp != head; tmp = tmp->next) {
+ struct inode * inode;
+
+ inode = list_entry(tmp, struct inode, i_list);
+ if (inode->i_dev != dev)
+ continue;
+ if (!atomic_read(&inode->i_count))
+ continue;
+ if (inode != root->d_inode) {
+ busy = 1;
+ break;
+ }
+ }
+ spin_unlock(&inode_lock);
+printk("fs_may_umount: device %d busy=%d\n", dev, busy);
+ return !busy;
}

+/*
+ * Check that no files on this device are currently open for writing.
+ */
int fs_may_remount_ro(kdev_t dev)
{
- return 0;
+ struct file * file;
+
+ for (file = inuse_filps; file; file = file->f_next) {
+ if (!file->f_inode || file->f_inode->i_dev != dev)
+ continue;
+ if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2))
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Debugging aids
+ */
+void show_inodes(void)
+{
+ struct list_head *head;
+ int nlist, accounted = 0;
+ static char *list_types[3] = {"CLEAN ","DIRTY ", "UNUSED"};
+
+ printk("Inodes Allocated: %d\n", inodes_stat.nr_inodes);
+ for (nlist = 0, head = &inode_in_use; nlist < 3; nlist++, head++) {
+ struct list_head *tmp;
+ struct inode *inode;
+ int found, used, lastused, hashed, locked, dirty;
+ int count, pipe, negative;
+
+ found = used = lastused = hashed = locked = dirty = 0;
+ pipe = negative = 0;
+ for (tmp = head->next; tmp != head; tmp = tmp->next) {
+ found++;
+
+ inode = list_entry(tmp, struct inode, i_list);
+ if ((count = atomic_read(&inode->i_count)) != 0)
+ used++, lastused = found;
+ if (inode->i_hash.next &&
+ inode->i_hash.next != &inode->i_hash)
+ hashed++;
+ if (test_bit(I_LOCK, &inode->i_state))
+ locked++;
+ if (test_bit(I_DIRTY, &inode->i_state))
+ dirty++;
+ /*
+ * Check for problems ... these shouldn't happen.
+ */
+ if (inode->i_pipe)
+ pipe++;
+ if (count < 0)
+ negative++;
+ }
+ printk("%s: %d inodes, %d used (last=%d), %d hashed, "
+ "%d locked, %d dirty, odd: %d %d\n",
+ list_types[nlist], found, used, lastused, hashed,
+ locked, dirty, pipe, negative);
+ accounted += found;
+ }
+ printk("Inodes Accounted: %d\n", accounted);
}
--- fs/super.c.old Wed Jul 9 07:18:27 1997
+++ fs/super.c Thu Jul 10 21:58:25 1997
@@ -465,7 +465,7 @@
}
if (!(sb = get_super(dev)))
return;
- if (sb->s_root != sb->s_root->d_mounts) {
+ if (sb->s_root && sb->s_root != sb->s_root->d_mounts) {
printk("VFS: Mounted device %s - tssk, tssk\n",
kdevname(dev));
return;
--- include/linux/fs.h.old Fri Jul 11 11:27:37 1997
+++ include/linux/fs.h Fri Jul 11 11:48:05 1997
@@ -792,6 +792,7 @@
extern kdev_t ROOT_DEV;

extern void show_buffers(void);
+extern void show_inode(void);
extern void mount_root(void);

#ifdef CONFIG_BLK_DEV_INITRD
--- include/linux/list.h.old Wed Jul 9 07:30:37 1997
+++ include/linux/list.h Fri Jul 11 11:48:01 1997
@@ -27,10 +27,19 @@
static inline void list_del(struct list_head *entry)
{
struct list_head *next, *prev;
+#ifdef INODE_PARANOIA
+if (!entry->next) {
+printk("list_del: already removed! (file %s line %s)\n", __FILE__, __LINE__);
+return; }
+#endif
next = entry->next;
prev = entry->prev;
next->prev = prev;
prev->next = next;
+#ifdef INODE_PARANOIA
+entry->next = NULL;
+entry->prev = NULL;
+#endif
}

#define list_entry(ptr, type, member) \
--- arch/i386/mm/init.c.old Mon May 12 13:35:38 1997
+++ arch/i386/mm/init.c Fri Jul 11 07:35:44 1997
@@ -92,6 +92,7 @@
printk("%d reserved pages\n",reserved);
printk("%d pages shared\n",shared);
show_buffers();
+ show_inodes();
#ifdef CONFIG_NET
show_net_buffers();
#endif

--------------EC1D2B3F5E12059EDB7C54BB--