variable fd array patch for 2.1.90

Bill Hawes (whawes@star.net)
Thu, 26 Mar 1998 14:41:39 -0500


This is a multi-part message in MIME format.
--------------759A7506A5A543E27F19D4A0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

The attached patch provides for variable-sized fd arrays, automatically
expanding from a small default (32 files) to the full 1024 when needed. This
results in a significant savings of non-pageable kernel memory (about 35 pages
just for the system tasks), and should significantly speed up process forking
when fewer than 32 files are used.

The patch adds an FDSize: entry to the /proc/pid/status output so you can
monitor which tasks are using more files. (As discussed previously on the list
here, the bash shell opens fd 255 and therefore requires a full fd array.)

The patch is against 2.1.90, but you'll need the changes in 2.1.91 pre-2 to test
the code. Comments and suggestions welcome ...

Regards,
Bill
--------------759A7506A5A543E27F19D4A0
Content-Type: text/plain; charset=us-ascii; name="fork_files90-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fork_files90-patch"

--- linux-2.1.90/include/linux/sched.h.old Sun Mar 22 12:22:56 1998
+++ linux-2.1.90/include/linux/sched.h Tue Mar 24 23:18:21 1998
@@ -119,7 +119,7 @@

asmlinkage void schedule(void);

-
+#define NR_OPEN_DEFAULT 32
/*
* Open file table structure
*/
@@ -129,6 +129,7 @@
struct file ** fd; /* current fd array */
fd_set close_on_exec;
fd_set open_fds;
+ struct file * fd_array[NR_OPEN_DEFAULT];
};

#define INIT_FILES { \
@@ -136,7 +137,8 @@
NR_OPEN, \
&init_fd_array[0], \
{ { 0, } }, \
- { { 0, } } \
+ { { 0, } }, \
+ { NULL, } \
}

struct fs_struct {
@@ -558,6 +560,13 @@
mm->count++;
}
extern void mmput(struct mm_struct *);
+
+/*
+ * Routines for handling the fd arrays
+ */
+extern struct file ** alloc_fd_array(int);
+extern int expand_fd_array(struct files_struct *);
+extern void free_fd_array(struct file **, int);

extern int copy_thread(int, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
extern void flush_thread(void);
--- linux-2.1.90/kernel/fork.c.old Sun Mar 22 11:30:40 1998
+++ linux-2.1.90/kernel/fork.c Wed Mar 25 18:37:17 1998
@@ -378,11 +378,74 @@
return __copy_fdset(dst->fds_bits, src->fds_bits);
}

+/*
+ * Allocate an fd array, using get_free_page() if possible.
+ */
+struct file ** alloc_fd_array(int num)
+{
+ struct file **new_fds;
+ int size = num * sizeof(struct file *);
+
+ if (size == PAGE_SIZE)
+ new_fds = (struct file **) __get_free_page(GFP_KERNEL);
+ else
+ new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
+ if (new_fds)
+ memset((void *) new_fds, 0, size);
+ return new_fds;
+}
+
+/*
+ * Expand the fd array in the files_struct.
+ */
+int expand_fd_array(struct files_struct *files)
+{
+ struct file **new_fds;
+ int error, nfds;
+
+ error = -EMFILE;
+ if (files->max_fds >= NR_OPEN)
+ goto out;
+
+ /* Expand to the max in one step */
+ nfds = NR_OPEN;
+
+ error = -ENOMEM;
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out;
+
+ /* Copy the existing array and install the new pointer */
+ if (nfds > files->max_fds) {
+ int i;
+ for (i = files->max_fds; i--; )
+ new_fds[i] = files->fd[i];
+ files->fd = new_fds;
+ files->max_fds = nfds;
+ } else {
+ /* Somebody expanded the array while we slept ... */
+ free_fd_array(new_fds, nfds);
+ }
+ error = 0;
+out:
+ return error;
+}
+
+void free_fd_array(struct file **array, int num)
+{
+ int size = num * sizeof(struct file *);
+
+ if (size == PAGE_SIZE)
+ free_page((unsigned long) array);
+ else
+ kfree(array);
+}
+
static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
- int size, i, error = 0;
+ int nfds, i, error = 0;

/*
* A background process may not have any files ...
@@ -402,24 +465,31 @@
if (!newf)
goto out;

- /*
- * Allocate the fd array, using get_free_page() if possible.
- * Eventually we want to make the array size variable ...
- */
- size = NR_OPEN * sizeof(struct file *);
- if (size == PAGE_SIZE)
- new_fds = (struct file **) __get_free_page(GFP_KERNEL);
- else
- new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
- if (!new_fds)
- goto out_release;
- memset((void *) new_fds, 0, size);
-
newf->count = 1;
- newf->max_fds = NR_OPEN;
- newf->fd = new_fds;
newf->close_on_exec = oldf->close_on_exec;
i = copy_fdset(&newf->open_fds, &oldf->open_fds);
+#if 1
+ /* Do a sanity check ... */
+ if (i > oldf->max_fds)
+ printk("copy_files: pid %d, open files %d exceeds max %d!\n",
+ current->pid, i, oldf->max_fds);
+#endif
+
+ /*
+ * Check whether we need to allocate a larger fd array.
+ * Note: we're not a clone task, so the open count won't
+ * change.
+ */
+ new_fds = &newf->fd_array[0];
+ nfds = NR_OPEN_DEFAULT;
+ if (i > nfds) {
+ nfds = NR_OPEN;
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out_release;
+ }
+ newf->max_fds = nfds;
+ newf->fd = new_fds;

old_fds = oldf->fd;
for (; i != 0; i--) {
--- linux-2.1.90/kernel/exit.c.old Sun Mar 22 11:30:40 1998
+++ linux-2.1.90/kernel/exit.c Tue Mar 24 18:36:24 1998
@@ -189,12 +189,10 @@
if (!--files->count) {
close_files(files);
/*
- * Free the fd array as appropriate ...
+ * Free the fd array if we expanded it.
*/
- if (NR_OPEN * sizeof(struct file *) == PAGE_SIZE)
- free_page((unsigned long) files->fd);
- else
- kfree(files->fd);
+ if (files->fd != &files->fd_array[0])
+ free_fd_array(files->fd, files->max_fds);
kmem_cache_free(files_cachep, files);
}
}
--- linux-2.1.90/fs/open.c.old Sun Mar 22 11:30:37 1998
+++ linux-2.1.90/fs/open.c Wed Mar 25 00:15:27 1998
@@ -688,6 +688,7 @@
struct files_struct * files = current->files;
int fd, error;

+repeat:
error = -EMFILE;
fd = find_first_zero_bit(&files->open_fds, NR_OPEN);
/*
@@ -696,8 +697,15 @@
*/
if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
-
- /* Check here for fd > files->max_fds to do dynamic expansion */
+ /*
+ * Check whether we need to expand the fd array.
+ */
+ if (fd > files->max_fds) {
+ error = expand_fd_array(files);
+ if (!error)
+ goto repeat;
+ goto out;
+ }

FD_SET(fd, &files->open_fds);
FD_CLR(fd, &files->close_on_exec);
--- linux-2.1.90/fs/fcntl.c.old Sun Mar 22 11:30:36 1998
+++ linux-2.1.90/fs/fcntl.c Wed Mar 25 00:10:29 1998
@@ -20,14 +20,15 @@

extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);

-static inline int dupfd(unsigned int fd, unsigned int arg)
+static inline int dupfd(unsigned int fd, unsigned int in_arg)
{
struct files_struct * files = current->files;
struct file * file;
+ unsigned int arg;
int error;

error = -EINVAL;
- if (arg >= NR_OPEN)
+ if (in_arg >= NR_OPEN)
goto out;

error = -EBADF;
@@ -35,10 +36,21 @@
if (!file)
goto out;

+repeat:
error = -EMFILE;
- arg = find_next_zero_bit(&files->open_fds, NR_OPEN, arg);
+ arg = find_next_zero_bit(&files->open_fds, NR_OPEN, in_arg);
if (arg >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out_putf;
+ /*
+ * Check whether we need to expand the fd array.
+ */
+ if (arg > files->max_fds) {
+ error = expand_fd_array(files);
+ if (!error)
+ goto repeat;
+ goto out_putf;
+ }
+
FD_SET(arg, &files->open_fds);
FD_CLR(arg, &files->close_on_exec);
fd_install(arg, file);
--- linux-2.1.90/fs/proc/array.c.old Sun Mar 22 11:30:37 1998
+++ linux-2.1.90/fs/proc/array.c Wed Mar 25 18:50:18 1998
@@ -656,11 +656,14 @@
"Pid:\t%d\n"
"PPid:\t%d\n"
"Uid:\t%d\t%d\t%d\t%d\n"
- "Gid:\t%d\t%d\t%d\t%d\n",
+ "Gid:\t%d\t%d\t%d\t%d\n"
+ "FDSize:\t%d\n",
get_task_state(p),
- p->pid, p->p_pptr->pid,
+ p->pid,
+ p->p_pptr->pid,
p->uid, p->euid, p->suid, p->fsuid,
- p->gid, p->egid, p->sgid, p->fsgid);
+ p->gid, p->egid, p->sgid, p->fsgid,
+ p->files ? p->files->max_fds : 0);
return buffer;
}

@@ -930,7 +933,8 @@
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
do {
- statm_pte_range(pmd, address, end - address, pages, shared, dirty, total);
+ statm_pte_range(pmd, address, end - address,
+ pages, shared, dirty, total);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address < end);
@@ -940,7 +944,8 @@
int * pages, int * shared, int * dirty, int * total)
{
while (address < end) {
- statm_pmd_range(pgd, address, end - address, pages, shared, dirty, total);
+ statm_pmd_range(pgd, address, end - address,
+ pages, shared, dirty, total);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
pgd++;
}
@@ -948,7 +953,7 @@

static int get_statm(int pid, char * buffer)
{
- struct task_struct *tsk = find_task_by_pid(pid);
+ struct task_struct *tsk;
int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0;

read_lock(&tasklist_lock);
@@ -963,7 +968,8 @@
pgd_t *pgd = pgd_offset(tsk->mm, vma->vm_start);
int pages = 0, shared = 0, dirty = 0, total = 0;

- statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total);
+ statm_pgd_range(pgd, vma->vm_start, vma->vm_end,
+ &pages, &shared, &dirty, &total);
resident += pages;
share += shared;
dt += dirty;

--------------759A7506A5A543E27F19D4A0--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu