pre-patch-2.1.45

Bill Hawes (whawes@star.net)
Thu, 10 Jul 1997 20:46:44 -0400


This is a multi-part message in MIME format.
--------------94B58FE057C27FD583B5D71B
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

> Linus Torvalds <torvalds@transmeta.com> writes:
> >
> > I haven't seen any corruption - the only problem I see these days is the
> > memory leak due to not freeing dentries (I'm working on this), and the
> > related dirty shutdown because we don't know that we can unmount.

>From what I could see it looked like the memory leak was due to pipe
inodes not being freed -- they aren't in the list (except accidentally)
and don't have a put_inode() call, so I couldn't see any way they were
being freed. I added code to release the PIPE_BASE page and call
clear_inode.

The dentry layer seems to be holding the inode count above one, so they
don't get released. If I run a compile, the count of inodes goes up for
every file, but if I rerun the same compile, no new inodes are
allocated.

I've attached a snapshot of my changes -- try_to_free_inode now combs
the list looking for free inodes, and may_mount/umount seem to work,
except there's some other problem in the kernel that causes an oops when
doing the umount. Haven't tracked it down yet.
--------------94B58FE057C27FD583B5D71B
Content-Type: text/plain; charset=us-ascii; name="inode_45-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="inode_45-patch"

--- fs/inode.c.old Wed Jul 9 07:30:36 1997
+++ fs/inode.c Thu Jul 10 20:21:46 1997
@@ -57,7 +57,7 @@
int nr_inodes;
int nr_free_inodes;
int dummy[10];
-} inodes_stat;
+} inodes_stat = {0, 0};

int max_inodes = NR_INODE;

@@ -131,13 +131,17 @@
size -= sizeof(struct inode);
} while (size >= 0);
init_once(inode);
+ inodes_stat.nr_inodes += PAGE_SIZE / sizeof(struct inode);
}
return inode;
}

+/*
+ * This is called with I_LOCK set.
+ */
static inline void write_inode(struct inode *inode)
{
- if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
+ if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
inode->i_sb->s_op->write_inode(inode);
}

@@ -182,22 +186,41 @@
}

/*
- * This is called by the filesystem to tell us
- * that the inode is no longer useful. We just
- * terminate it with extreme predjudice.
+ * Non-blocking version of clear_inode. Caller must hold the inode lock,
+ * and must verify that the inode is not locked and that no disk quota
+ * exists prior to calling. (Disk quota is released when the use count
+ * goes from 1 to 0.)
*/
-void clear_inode(struct inode *inode)
+static void _clear_inode(struct inode *inode)
{
+if (test_bit(I_LOCK, &inode->i_state))
+printk("_clear_inode: inode %ld locked!\n", inode->i_ino);
+if (IS_WRITABLE(inode))
+printk("_clear_inode: inode %ld disk quota not released!\n", inode->i_ino);
+
+ /*
+ * This must be non-blocking to be completely safe.
+ */
truncate_inode_pages(inode, 0);
- wait_on_inode(inode);
- if (IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
- inode->i_sb->dq_op->drop(inode);

- spin_lock(&inode_lock);
inode->i_state = 0;
list_del(&inode->i_hash);
list_del(&inode->i_list);
list_add(&inode->i_list, &inode_unused);
+}
+
+/*
+ * This is called by the filesystem to tell us
+ * that the inode is no longer useful. We just
+ * terminate it with extreme prejudice.
+ */
+void clear_inode(struct inode *inode)
+{
+if (atomic_read(&(inode)->i_count) != 1)
+printk("clear_inode: count=%d\n", atomic_read(&(inode)->i_count));
+
+ spin_lock(&inode_lock);
+ _clear_inode(inode);
spin_unlock(&inode_lock);
}

@@ -206,9 +229,10 @@
((inode)->i_nrpages == 0) && \
(!test_bit(I_LOCK, &(inode)->i_state)))

-static void invalidate_list(struct list_head *head, kdev_t dev)
+static int invalidate_list(struct list_head *head, kdev_t dev, int check)
{
struct list_head *next;
+ int busy = 0;

next = head->next;
for (;;) {
@@ -221,70 +245,100 @@
inode = list_entry(tmp, struct inode, i_list);
if (inode->i_dev != dev)
continue;
- if (!CAN_UNUSE(inode))
- continue;
- list_del(&inode->i_hash);
- list_del(&inode->i_list);
- list_add(&inode->i_list, &inode_unused);
+ if (atomic_read(&(inode)->i_count) == 0 &&
+ !test_bit(I_DIRTY, &(inode)->i_state) &&
+ !test_bit(I_LOCK , &(inode)->i_state))
+ _clear_inode(inode);
+ else {
+ busy = 1;
+ if (check)
+ break;
+ }
}
+ return busy;
}

void invalidate_inodes(kdev_t dev)
{
spin_lock(&inode_lock);
- invalidate_list(&inode_in_use, dev);
- invalidate_list(&inode_dirty, dev);
+ invalidate_list(&inode_in_use, dev, 0);
+ invalidate_list(&inode_dirty , dev, 0);
spin_unlock(&inode_lock);
}

/*
- * This is called with the inode lock held. It just looks at the last
- * inode on the in-use list, and if the inode is trivially freeable
- * we just move it to the unused list.
+ * This is called with the inode lock held. It checks for potentially
+ * freeable inodes and uses _clear_inodes to free them.
*
- * Otherwise we just move the inode to be the first inode and expect to
- * get back to the problem later..
+ * If the first pass fails, we try syncing and check again.
*/
static void try_to_free_inodes(void)
{
- struct list_head * tmp;
+ struct list_head *tmp, *prev;
struct list_head *head = &inode_in_use;
+ int passes = 2, goal = 10, freed = 0;

- tmp = head->prev;
- if (tmp != head) {
- struct inode * inode;
+ /*
+ * Adjust our goal if there aren't very many inodes.
+ */
+ if (goal > (inodes_stat.nr_inodes >> 5))
+ goal = inodes_stat.nr_inodes >> 5;

- list_del(tmp);
- inode = list_entry(tmp, struct inode, i_list);
- if (CAN_UNUSE(inode)) {
- list_del(&inode->i_hash);
- head = &inode_unused;
+ while (passes--) {
+ for (tmp = head->prev; tmp != head; tmp = prev) {
+ struct inode * inode;
+
+ prev = tmp->prev;
+ inode = list_entry(tmp, struct inode, i_list);
+ if (atomic_read(&(inode)->i_count) != 0)
+ continue;
+ if (test_bit(I_LOCK, &(inode)->i_state))
+ continue;
+ /*
+ * Inode is potentially freeable ... should select
+ * least cost inodes though.
+ */
+ _clear_inode(inode);
+ freed++;
+ if (freed >= goal)
+ return;
}
- list_add(tmp, head);
+
+ if (freed) {
+printk("try_to_free_inodes: inodes=%d got %d\n", inodes_stat.nr_inodes, freed);
+ return;
+ }
+
+ /*
+ * First pass failed ... try syncing.
+ */
+ if (inode_dirty.next != &inode_dirty) {
+printk("try_to_free_inodes: got that syncing feeling ... %d\n",
+inodes_stat.nr_inodes);
+ sync_list(&inode_dirty, &inode_in_use);
+ } else
+ break;
}
}
-

+/*
+ * Called with the inode lock held.
+ */
static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
{
struct list_head *tmp;
struct inode * inode;

- tmp = head;
- for (;;) {
- tmp = tmp->next;
- inode = NULL;
- if (tmp == head)
- break;
+ for (tmp = head->next; tmp != head; tmp = tmp->next) {
inode = list_entry(tmp, struct inode, i_hash);
if (inode->i_sb != sb)
continue;
if (inode->i_ino != ino)
continue;
atomic_inc(&inode->i_count);
- break;
+ return inode;
}
- return inode;
+ return NULL;
}

/*
@@ -294,13 +348,15 @@
* i_sb, i_ino, i_count, i_state and the lists have
* been initialized elsewhere..
*/
-void clean_inode(struct inode *inode)
+static void clean_inode(struct inode *inode)
{
memset(&inode->u, 0, sizeof(inode->u));
inode->i_pipe = 0;
inode->i_sock = 0;
+ inode->i_mode = 0;
inode->i_op = NULL;
inode->i_nlink = 1;
+ inode->i_writecount = 0;
memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
sema_init(&inode->i_sem, 1);
}
@@ -315,66 +371,83 @@
unlock_inode(inode);
}

+/*
+ * N.B. This may now return NULL -- check for failure!
+ */
struct inode * get_empty_inode(void)
{
static unsigned long last_ino = 0;
struct inode * inode;
struct list_head * tmp = inode_unused.next;

+ spin_lock(&inode_lock);
if (tmp != &inode_unused) {
+get_unused:
list_del(tmp);
inode = list_entry(tmp, struct inode, i_list);
+if (atomic_read(&(inode)->i_count))
+printk("get_empty_inode: unused count=%d\n", atomic_read(&(inode)->i_count));
+
add_new_inode:
INIT_LIST_HEAD(&inode->i_list);
INIT_LIST_HEAD(&inode->i_hash);
inode->i_sb = NULL;
+ inode->i_dev = 0;
inode->i_ino = ++last_ino;
- atomic_set(&inode->i_count, 1);
+ inode->i_flags = 0;
+ atomic_inc(&inode->i_count);
inode->i_state = 0;
+ spin_unlock(&inode_lock);
clean_inode(inode);
return inode;
}

/*
- * Warning: if this succeeded, we will now
- * return with the inode lock, and we need to
- * unlock it.
+ * No unused inodes ... try to free some.
+ */
+ try_to_free_inodes();
+ tmp = inode_unused.next;
+ if (tmp != &inode_unused)
+ goto get_unused;
+
+ /*
+ * Unlock to try to allocate more inodes.
*/
+ spin_unlock(&inode_lock);
inode = grow_inodes();
- if (inode) {
- spin_unlock(&inode_lock);
+ if (inode)
goto add_new_inode;
- }
return inode;
}

struct inode * get_pipe_inode(void)
{
extern struct inode_operations pipe_inode_operations;
- struct inode *inode = get_empty_inode();
+ struct inode *inode;
+ unsigned long page = __get_free_page(GFP_USER);

- if (inode) {
- unsigned long page = __get_free_page(GFP_USER);
+ if (!page)
+ return NULL;

- if (!page) {
- iput(inode);
- inode = NULL;
- } else {
- PIPE_BASE(*inode) = (char *) page;
- inode->i_op = &pipe_inode_operations;
- atomic_set(&inode->i_count, 1);
- PIPE_WAIT(*inode) = NULL;
- PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
- PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
- PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
- PIPE_LOCK(*inode) = 0;
- inode->i_pipe = 1;
- inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_blksize = PAGE_SIZE;
- }
+ inode = get_empty_inode();
+ if (inode) {
+ PIPE_BASE(*inode) = (char *) page;
+ inode->i_op = &pipe_inode_operations;
+ atomic_inc(&inode->i_count); /* two references */
+ PIPE_WAIT(*inode) = NULL;
+ PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+ PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
+ PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+ PIPE_LOCK(*inode) = 0;
+ inode->i_pipe = 1;
+ inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_blksize = PAGE_SIZE;
+ }
+ else {
+ free_page(page);
}
return inode;
}
@@ -388,8 +461,12 @@
struct list_head * tmp = inode_unused.next;

if (tmp != &inode_unused) {
+get_unused:
list_del(tmp);
inode = list_entry(tmp, struct inode, i_list);
+if (atomic_read(&(inode)->i_count))
+printk("get_new_inode: unused count=%d\n", atomic_read(&(inode)->i_count));
+
add_new_inode:
list_add(&inode->i_list, &inode_in_use);
list_add(&inode->i_hash, head);
@@ -397,7 +474,7 @@
inode->i_dev = sb->s_dev;
inode->i_ino = ino;
inode->i_flags = sb->s_flags;
- atomic_set(&inode->i_count, 1);
+ atomic_inc(&inode->i_count);
inode->i_state = 1 << I_LOCK;
spin_unlock(&inode_lock);
clean_inode(inode);
@@ -406,6 +483,14 @@
}

/*
+ * No unused inodes ... try to free some.
+ */
+ try_to_free_inodes();
+ tmp = inode_unused.next;
+ if (tmp != &inode_unused)
+ goto get_unused;
+
+ /*
* Uhhuh.. We need to expand. Unlock for the allocation,
* but note that "grow_inodes()" will return with the
* lock held again if the allocation succeeded.
@@ -440,7 +525,6 @@
spin_lock(&inode_lock);
inode = find_inode(sb, ino, head);
if (!inode) {
- try_to_free_inodes();
return get_new_inode(sb, ino, head);
}
spin_unlock(&inode_lock);
@@ -456,23 +540,57 @@

void iput(struct inode *inode)
{
- if (inode) {
- if (inode->i_pipe)
- wake_up_interruptible(&PIPE_WAIT(*inode));
+ if (!inode)
+ return;
+
+ if (inode->i_pipe)
+ wake_up_interruptible(&PIPE_WAIT(*inode));

- /*
- * Last user dropping the inode?
- */
- if (atomic_read(&inode->i_count) == 1) {
- void (*put)(struct inode *);
+ /*
+ * Last user dropping the inode?
+ */
+ while (atomic_read(&inode->i_count) == 1) {
+ if (test_bit(I_LOCK, &inode->i_state))
+ __wait_on_inode(inode);
+ else if (test_and_clear_bit(I_DIRTY, &inode->i_state)) {
+ set_bit(I_LOCK, &inode->i_state);
+ write_inode(inode);
+ unlock_inode(inode);
+ }
+ else if (IS_WRITABLE(inode) && inode->i_sb &&
+ inode->i_sb->dq_op) {
+ set_bit(I_LOCK, &inode->i_state);
+ inode->i_sb->dq_op->drop(inode);
+ unlock_inode(inode);
+ }
+ else {
+ /*
+ * The preceeding operations ensure that the
+ * put_inode() routine won't block in clear_inode.
+ */
if (inode->i_sb && inode->i_sb->s_op) {
+ void (*put)(struct inode *);
put = inode->i_sb->s_op->put_inode;
if (put)
put(inode);
}
+
+ /*
+ * Pipe inodes currently aren't in the lists,
+ * so we have to handle them specially.
+ */
+ if (inode->i_pipe) {
+ free_page((unsigned long)PIPE_BASE(*inode));
+ PIPE_BASE(*inode) = NULL;
+ clear_inode(inode);
+ }
+ break;
}
- atomic_dec(&inode->i_count);
}
+ atomic_dec(&inode->i_count);
+if (atomic_read(&inode->i_count) < 0)
+printk("iput: open count wrapped dev=%d inode=%ld count=%d\n",
+inode->i_dev, inode->i_ino, atomic_read(&inode->i_count));
}

int bmap(struct inode * inode, int block)
@@ -499,20 +617,72 @@
}

/*
- * FIXME! These need to go through the in-use inodes to
- * check whether we can mount/umount/remount.
+ * Check whether we can mount.
*/
int fs_may_mount(kdev_t dev)
{
- return 1;
+ int busy;
+
+ spin_lock(&inode_lock);
+ busy = invalidate_list(&inode_dirty , dev, 1) ||
+ invalidate_list(&inode_in_use, dev, 1);
+ spin_unlock(&inode_lock);
+printk("fs_may_mount: device %d busy=%d\n", dev, busy);
+ return !busy;
}

+/*
+ * Check whether we can unmount. This routine invalidates the device's
+ * inodes to prevent races between returning and eventual unmounting.
+ */
int fs_may_umount(kdev_t dev, struct dentry * root)
{
- return 0;
+ struct list_head * tmp;
+ struct list_head *head = &inode_in_use;
+ int busy = 0;
+
+ /*
+ * Sync and then invalidate all inodes for this device.
+ */
+ sync_inodes(dev);
+
+ spin_lock(&inode_lock);
+ invalidate_list(&inode_in_use, dev, 0);
+
+ /*
+ * Check the list to see if anything is still in use ...
+ */
+ for (tmp = head->next; tmp != head; tmp = tmp->next) {
+ struct inode * inode;
+
+ inode = list_entry(tmp, struct inode, i_list);
+ if (inode->i_dev != dev)
+ continue;
+ if (!atomic_read(&inode->i_count))
+ continue;
+ if (inode != root->d_inode) {
+ busy = 1;
+ break;
+ }
+ }
+ spin_unlock(&inode_lock);
+printk("fs_may_umount: device %d busy=%d\n", dev, busy);
+ return 0; /* !busy; */
}

+/*
+ * Check that no files on this device are currently open for writing.
+ */
int fs_may_remount_ro(kdev_t dev)
{
- return 0;
+ struct file * file;
+printk("fs_may_remount_ro: checking device %d\n", dev);
+
+ for (file = inuse_filps; file; file = file->f_next) {
+ if (!file->f_inode || file->f_inode->i_dev != dev)
+ continue;
+ if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2))
+ return 0;
+ }
+ return 1;
}

--------------94B58FE057C27FD583B5D71B--