Re: 2.1.57 /proc problem

Bill Hawes (whawes@star.net)
Mon, 29 Sep 1997 08:25:30 -0400


This is a multi-part message in MIME format.
--------------8D1DF1B16551B36C695B0E31
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Richard Guenther wrote:
> This is a bug in procfs not deleting the dentry for the file.
> It's been there for a long time now... (You can easy cause this
> with binfmt_misc, too)
> I suppose, this will not be fixed until the VFS has finally moved
> to dentries and procfs has got a 'little' overhaul.

Hi Richard,
I sent Heinz a patch to try out, and if you have a similar problem
perhaps you could test it as well? I've appended the text of my prior
message.

> BTW I reported this bug twice some time ago...

I remember your posting on the binfmt_misc problem, but didn't make the
connection with the dentry caching at the time.

Regards,
Bill

[appended message]
Heinz Mauelshagen wrote:
In 2.1.57 it seemed to work using proc_register and proc_unregister.
>
> But strange results occured after proc_unregiste (module unload):
>
> "ls -l /proc/e*" doesn't show /proc/egg.
> "ls -l /proc/egg" shows it!
> "cat /proc/egg" segfaults :-(

Does your /proc code install a custom inode_operations in the inodes?
I'm pretty sure I know what's going wrong: the dcache is keeping your
inodes around after you've unregistered the module, and the subsequent
call through the i_op vector goes to never-never land.

As luck would have it, I've been working on a patch for /proc in the
past couple of days that should fix this problem (and others). Could
you try the attached patch and see if it helps?

Regards,
Bill
--------------8D1DF1B16551B36C695B0E31
Content-Type: text/plain; charset=us-ascii; name="procfs_57-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="procfs_57-patch"

--- linux-2.1.57/kernel/fork.c.old Fri Sep 26 08:10:52 1997
+++ linux-2.1.57/kernel/fork.c Sun Sep 28 13:27:14 1997
@@ -208,7 +208,6 @@
struct vm_area_struct * mpnt, *tmp, **pprev;
int retval;

- mm->mmap = mm->mmap_cache = NULL;
flush_cache_mm(current->mm);
pprev = &mm->mmap;
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
@@ -254,8 +253,7 @@
if (retval)
goto fail_nomem;
}
- flush_tlb_mm(current->mm);
- return 0;
+ retval = 0;

fail_nomem:
flush_tlb_mm(current->mm);
@@ -276,7 +274,11 @@
mm->count = 1;
mm->def_flags = 0;
mm->mmap_sem = MUTEX;
- mm->pgd = NULL;
+ /*
+ * Install the kernel page directory so
+ * pgd_offset() is always valid.
+ */
+ mm->pgd = swapper_pg_dir;
mm->mmap = mm->mmap_cache = NULL;

/* It has not run yet, so cannot be present in anyone's
@@ -324,10 +326,12 @@
goto free_mm;
retval = dup_mmap(mm);
if (retval)
- goto free_mm;
+ goto free_pt;
return 0;

free_mm:
+ mm->pgd = NULL;
+free_pt:
tsk->mm = NULL;
mmput(mm);
fail_nomem:
--- linux-2.1.57/fs/proc/array.c.old Fri Sep 26 08:10:51 1997
+++ linux-2.1.57/fs/proc/array.c Sun Sep 28 12:01:33 1997
@@ -348,6 +348,14 @@

if (!p || !p->mm || ptr >= TASK_SIZE)
return 0;
+ /*
+ * Note: a new task may (briefly) have mm->pgd == NULL
+ */
+ if (!p->mm->pgd) {
+ printk("get_phys_addr: pid %d has NULL pgd!\n", p->pid);
+ return 0;
+ }
+
page_dir = pgd_offset(p->mm,ptr);
if (pgd_none(*page_dir))
return 0;
@@ -861,6 +869,7 @@
struct vm_area_struct * vma = tsk->mm->mmap;

while (vma) {
+ /* Note: if the task has mmaps, mm->pgd is valid */
pgd_t *pgd = pgd_offset(tsk->mm, vma->vm_start);
int pages = 0, shared = 0, dirty = 0, total = 0;

@@ -917,24 +926,31 @@
#define MAPS_LINE_MAX MAPS_LINE_MAX8


-static long read_maps (int pid, struct file * file,
- char * buf, unsigned long count)
+static long read_maps (int pid, struct file * file, char * buf,
+ unsigned long count)
{
- struct task_struct *p = find_task_by_pid(pid);
- char * destptr;
+ struct task_struct *p;
+ struct vm_area_struct * map, * next;
+ char * destptr = buf, * buffer;
loff_t lineno;
- int column;
- struct vm_area_struct * map;
- int i;
- char * buffer;
+ int column, i;
+ long retval;
+
+ /*
+ * We might sleep getting the page, so get it first.
+ */
+ retval = -ENOMEM;
+ buffer = (char*)__get_free_page(GFP_KERNEL);
+ if (!buffer)
+ goto out;

+ retval = -EINVAL;
+ p = find_task_by_pid(pid);
if (!p)
- return -EINVAL;
+ goto freepage_out;

if (!p->mm || p->mm == &init_mm || count == 0)
- return 0;
-
- buffer = (char*)__get_free_page(GFP_KERNEL);
+ goto getlen_out;

/* decode f_pos */
lineno = file->f_pos >> MAPS_LINE_SHIFT;
@@ -944,9 +960,7 @@
for (map = p->mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++)
continue;

- destptr = buf;
-
- for ( ; map ; ) {
+ for ( ; map ; map = next ) {
/* produce the next line */
char *line;
char str[5], *cp = str;
@@ -957,6 +971,10 @@
MAPS_LINE_MAX4 : MAPS_LINE_MAX8;
int len;

+ /*
+ * Get the next vma now (but it won't be used if we sleep).
+ */
+ next = map->vm_next;
flags = map->vm_flags;

*cp++ = flags & VM_READ ? 'r' : '-';
@@ -993,20 +1011,19 @@
if (column >= len) {
column = 0; /* continue with next line at column 0 */
lineno++;
- map = map->vm_next;
- continue;
+ continue; /* we haven't slept */
}

i = len-column;
if (i > count)
i = count;
- copy_to_user(destptr, line+column, i);
- destptr += i; count -= i;
- column += i;
+ copy_to_user(destptr, line+column, i); /* may have slept */
+ destptr += i;
+ count -= i;
+ column += i;
if (column >= len) {
column = 0; /* next time: next line at column 0 */
lineno++;
- map = map->vm_next;
}

/* done? */
@@ -1015,6 +1032,7 @@

/* By writing to user space, we might have slept.
* Stop the loop, to avoid a race condition.
+ * N.B. cloned task might have had mmaps changed ...
*/
if (p != current)
break;
@@ -1023,8 +1041,13 @@
/* encode f_pos */
file->f_pos = (lineno << MAPS_LINE_SHIFT) + column;

+getlen_out:
+ retval = destptr - buf;
+
+freepage_out:
free_page((unsigned long)buffer);
- return destptr-buf;
+out:
+ return retval;
}

#ifdef CONFIG_MODULES
--- linux-2.1.57/fs/proc/root.c.old Wed Sep 10 09:21:27 1997
+++ linux-2.1.57/fs/proc/root.c Fri Sep 26 10:39:01 1997
@@ -173,7 +173,8 @@

int proc_openprom_regdev(struct openpromfs_dev *d)
{
- if (proc_openpromdev_ino == PROC_OPENPROMD_FIRST + PROC_NOPENPROMD) return -1;
+ if (proc_openpromdev_ino == PROC_OPENPROMD_FIRST + PROC_NOPENPROMD)
+ return -1;
d->next = proc_openprom_devices;
d->inode = proc_openpromdev_ino++;
proc_openprom_devices = d;
@@ -218,6 +219,7 @@
(inode, filp, dirent, filldir);
return -EINVAL;
}
+#define OPENPROM_DEFREADDIR proc_openprom_defreaddir

static int
proc_openprom_deflookup(struct inode * dir, struct dentry *dentry)
@@ -229,17 +231,17 @@
(dir, dentry);
return -ENOENT;
}
+#define OPENPROM_DEFLOOKUP proc_openprom_deflookup
+#else
+#define OPENPROM_DEFREADDIR NULL
+#define OPENPROM_DEFLOOKUP NULL
#endif

static struct file_operations proc_openprom_operations = {
NULL, /* lseek - default */
NULL, /* read - bad */
NULL, /* write - bad */
-#if defined(CONFIG_SUN_OPENPROMFS_MODULE) && defined(CONFIG_KERNELD)
- proc_openprom_defreaddir,/* readdir */
-#else
- NULL, /* readdir */
-#endif
+ OPENPROM_DEFREADDIR, /* readdir */
NULL, /* poll - default */
NULL, /* ioctl - default */
NULL, /* mmap */
@@ -251,11 +253,7 @@
struct inode_operations proc_openprom_inode_operations = {
&proc_openprom_operations,/* default net directory file-ops */
NULL, /* create */
-#if defined(CONFIG_SUN_OPENPROMFS_MODULE) && defined(CONFIG_KERNELD)
- proc_openprom_deflookup,/* lookup */
-#else
- NULL, /* lookup */
-#endif
+ OPENPROM_DEFLOOKUP, /* lookup */
NULL, /* link */
NULL, /* unlink */
NULL, /* symlink */
@@ -639,6 +637,26 @@
}

/*
+ * As some entries in /proc are volatile, we want to
+ * get rid of unused dentries. This could be made
+ * smarter: we could keep a "volatile" flag in the
+ * inode to indicate which ones to keep.
+ */
+static void
+proc_delete_dentry(struct dentry * dentry)
+{
+ d_drop(dentry);
+}
+
+static struct dentry_operations proc_dentry_operations =
+{
+ NULL, /* revalidate */
+ NULL, /* d_hash */
+ NULL, /* d_compare */
+ proc_delete_dentry /* d_delete(struct dentry *) */
+};
+
+/*
* Don't create negative dentries here, return -ENOENT by hand
* instead.
*/
@@ -646,12 +664,15 @@
{
struct inode *inode;
struct proc_dir_entry * de;
+ int error;

+ error = -ENOTDIR;
if (!dir || !S_ISDIR(dir->i_mode))
- return -ENOTDIR;
+ goto out;

- de = (struct proc_dir_entry *) dir->u.generic_ip;
+ error = -ENOENT;
inode = NULL;
+ de = (struct proc_dir_entry *) dir->u.generic_ip;
if (de) {
for (de = de->subdir; de ; de = de->next) {
if (!de || !de->low_ino)
@@ -660,18 +681,20 @@
continue;
if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
int ino = de->low_ino | (dir->i_ino & ~(0xffff));
+ error = -EINVAL;
inode = proc_get_inode(dir->i_sb, ino, de);
- if (!inode)
- return -EINVAL;
break;
}
}
}
- if (!inode)
- return -ENOENT;

- d_add(dentry, inode);
- return 0;
+ if (inode) {
+ dentry->d_op = &proc_dentry_operations;
+ d_add(dentry, inode);
+ error = 0;
+ }
+out:
+ return error;
}

static int proc_root_lookup(struct inode * dir, struct dentry * dentry)
@@ -721,6 +744,8 @@
if (!inode)
return -EINVAL;
}
+
+ dentry->d_op = &proc_dentry_operations;
d_add(dentry, inode);
return 0;
}
@@ -827,3 +852,4 @@
read_unlock(&tasklist_lock);
return 0;
}
+
--- linux-2.1.57/fs/proc/inode.c.old Sat Jul 19 08:17:14 1997
+++ linux-2.1.57/fs/proc/inode.c Fri Sep 26 10:39:01 1997
@@ -20,21 +20,28 @@
static void proc_put_inode(struct inode *inode)
{
#ifdef CONFIG_SUN_OPENPROMFS_MODULE
- if ((inode->i_ino >= PROC_OPENPROM_FIRST)
- && (inode->i_ino < PROC_OPENPROM_FIRST + PROC_NOPENPROM)
- && proc_openprom_use)
+ if ((inode->i_ino >= PROC_OPENPROM_FIRST) &&
+ (inode->i_ino < PROC_OPENPROM_FIRST + PROC_NOPENPROM) &&
+ proc_openprom_use)
(*proc_openprom_use)(inode, 0);
#endif
+ /*
+ * Kill off unused inodes ... VFS will unhash
+ * and delete if we set i_nlink to zero.
+ */
+ if (inode->i_count == 1)
+ inode->i_nlink = 0;
}

/*
- * Does this ever happen?
+ * Note: as of 2.1.56 we don't need a delete_inode routine
+ * if no special cleanup is required.
*/
+#if 0
static void proc_delete_inode(struct inode *inode)
{
- printk("proc_delete_inode()?\n");
- inode->i_size = 0;
}
+#endif

static void proc_put_super(struct super_block *sb)
{
@@ -47,7 +54,7 @@
proc_read_inode,
proc_write_inode,
proc_put_inode,
- proc_delete_inode,
+ NULL, /* delete_inode(struct inode *) */
NULL,
proc_put_super,
NULL,
@@ -85,9 +92,14 @@
return 1;
}

-struct inode * proc_get_inode(struct super_block * s, int ino, struct proc_dir_entry * de)
+struct inode * proc_get_inode(struct super_block * s, int ino,
+ struct proc_dir_entry * de)
{
- struct inode * inode = iget(s, ino);
+ struct inode * inode;
+
+ inode = iget(s, ino);
+ if (!inode)
+ goto out;

#ifdef CONFIG_SUN_OPENPROMFS_MODULE
if ((inode->i_ino >= PROC_OPENPROM_FIRST)
@@ -95,7 +107,9 @@
&& proc_openprom_use)
(*proc_openprom_use)(inode, 1);
#endif
- if (inode && inode->i_sb == s) {
+ /* N.B. How can this test ever fail?? */
+ if (inode->i_sb == s) {
+ /* N.B. could de disappear while we allocated an inode?? */
inode->u.generic_ip = (void *) de;
if (de) {
if (de->mode) {
@@ -126,26 +140,36 @@
}
read_unlock(&tasklist_lock);
}
+out:
return inode;
}

struct super_block *proc_read_super(struct super_block *s,void *data,
int silent)
{
+ struct inode * root_inode;
+
lock_super(s);
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
+ root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
+ if (!root_inode)
+ goto out_no_root;
+ s->s_root = d_alloc_root(root_inode, NULL);
+ if (!s->s_root)
+ goto out_no_root;
+ parse_options(data, &root_inode->i_uid, &root_inode->i_gid);
unlock_super(s);
- s->s_root = d_alloc_root(proc_get_inode(s, PROC_ROOT_INO, &proc_root), NULL);
- if (!s->s_root) {
- s->s_dev = 0;
- printk("get root inode failed\n");
- return NULL;
- }
- parse_options(data, &s->s_root->d_inode->i_uid, &s->s_root->d_inode->i_gid);
return s;
+
+out_no_root:
+ printk("proc_read_super: get root inode failed\n");
+ iput(root_inode);
+ s->s_dev = 0;
+ unlock_super(s);
+ return NULL;
}

int proc_statfs(struct super_block *sb, struct statfs *buf, int bufsiz)

--------------8D1DF1B16551B36C695B0E31--