Hi Linus,
This patch contains the following changes, basically a resend of the last
couple of days' patches that didn't make into pre7-1.
a) SMP-safe handling of vmlist by vmalloc/vfree/ioremap.
b) changes to fs/proc/kcore.c and ipc/util.c to benefit from a)
c) exported blk_get_queue() (same patch as V Ganesh sent to you) - needed
for us and also probably (looking at Ingo's RAID stuff) for Linux
RAID.
d) the typecasts like p = (char *)kmalloc() are not necessary as
kmalloc() returns void *.
e) make buffer_init() use (PAGE_SIZE << order) instead of \
(1UL<<order)*PAGE_SIZE
f) make dcache_init()/inode_init() print the hashtable size in a manner
consistent with standard (the standard being
buffer_init()/page_cache_init())
g) whitespace and comment changes to fs/super.c
h) refined file_systems_lock from the simple spinlock to read/write
spinlock as I think it better fits the usage of this particular
data structure (the file_systems list ie)
i) removed comments about "devfs crap" and made the corresponding
panic() message in mount_root() more instructive to the user.
j) removed unused sb variable from sys_umount()
I cc'd Al Viro as my patch touches fs/super.c and I suspect he is doing
some work on it?
Regards,
Tigran
diff -urN -X dontdiff linux/drivers/block/ll_rw_blk.c work/drivers/block/ll_rw_blk.c
--- linux/drivers/block/ll_rw_blk.c Thu Apr 27 09:01:29 2000
+++ work/drivers/block/ll_rw_blk.c Tue May 2 17:35:17 2000
@@ -1095,3 +1095,4 @@
EXPORT_SYMBOL(blk_queue_pluggable);
EXPORT_SYMBOL(blk_queue_make_request);
EXPORT_SYMBOL(generic_make_request);
+EXPORT_SYMBOL(blk_get_queue);
diff -urN -X dontdiff linux/drivers/char/mem.c work/drivers/char/mem.c
--- linux/drivers/char/mem.c Wed Apr 12 09:09:20 2000
+++ work/drivers/char/mem.c Tue May 2 16:42:51 2000
@@ -231,7 +231,8 @@
{
unsigned long p = *ppos;
ssize_t read = 0;
- ssize_t virtr;
+ ssize_t virtr = 0;
+ char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
if (p < (unsigned long) high_memory) {
read = count;
@@ -258,11 +259,27 @@
count -= read;
}
- virtr = vread(buf, (char *)p, count);
- if (virtr < 0)
- return virtr;
- *ppos += p + virtr;
- return virtr + read;
+ kbuf = (char *)get_free_page(GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+ while (count > 0) {
+ int len = count;
+
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+ len = vread(kbuf, (char *)p, len); /* always >= 0 */
+ if (len && copy_to_user(buf, kbuf, len)) {
+ free_page((unsigned long)kbuf);
+ return -EFAULT;
+ }
+ count -= len;
+ buf += len;
+ virtr += len;
+ p += len;
+ }
+ free_page((unsigned long)kbuf);
+ *ppos += p + virtr;
+ return virtr + read;
}
/*
diff -urN -X dontdiff linux/fs/binfmt_elf.c work/fs/binfmt_elf.c
--- linux/fs/binfmt_elf.c Thu Apr 27 09:01:30 2000
+++ work/fs/binfmt_elf.c Tue May 2 17:06:46 2000
@@ -237,7 +237,7 @@
size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
if (size > ELF_EXEC_PAGESIZE)
goto out;
- elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
+ elf_phdata = kmalloc(size, GFP_KERNEL);
if (!elf_phdata)
goto out;
@@ -421,7 +421,7 @@
size = elf_ex.e_phentsize * elf_ex.e_phnum;
if (size > 65536)
goto out;
- elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
+ elf_phdata = kmalloc(size, GFP_KERNEL);
if (!elf_phdata)
goto out;
@@ -456,8 +456,7 @@
*/
retval = -ENOMEM;
- elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
- GFP_KERNEL);
+ elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
if (!elf_interpreter)
goto out_free_file;
@@ -792,7 +791,7 @@
goto out;
error = -ENOMEM;
- elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL);
+ elf_phdata = kmalloc(j, GFP_KERNEL);
if (!elf_phdata)
goto out;
diff -urN -X dontdiff linux/fs/buffer.c work/fs/buffer.c
--- linux/fs/buffer.c Thu Apr 27 09:01:30 2000
+++ work/fs/buffer.c Tue May 2 16:44:42 2000
@@ -2277,7 +2277,7 @@
__get_free_pages(GFP_ATOMIC, order);
} while (hash_table == NULL && --order > 0);
printk("Buffer-cache hash table entries: %d (order: %d, %ld bytes)\n",
- nr_hash, order, (1UL<<order) * PAGE_SIZE);
+ nr_hash, order, (PAGE_SIZE << order));
if (!hash_table)
panic("Failed to allocate buffer hash table\n");
diff -urN -X dontdiff linux/fs/dcache.c work/fs/dcache.c
--- linux/fs/dcache.c Sat Apr 29 16:06:34 2000
+++ work/fs/dcache.c Tue May 2 16:44:42 2000
@@ -1098,10 +1098,11 @@
__get_free_pages(GFP_ATOMIC, order);
} while (dentry_hashtable == NULL && --order >= 0);
+ printk("VFS: dcache hash table entries: %d (order: %ld, %ld bytes)\n",
+ nr_hash, order, (PAGE_SIZE << order));
+
if (!dentry_hashtable)
panic("Failed to allocate dcache hash table\n");
-
- printk("VFS: DCACHE hash table configured to %d entries\n", nr_hash);
d = dentry_hashtable;
i = nr_hash;
diff -urN -X dontdiff linux/fs/inode.c work/fs/inode.c
--- linux/fs/inode.c Thu Apr 27 09:01:30 2000
+++ work/fs/inode.c Tue May 2 16:44:42 2000
@@ -876,10 +876,11 @@
__get_free_pages(GFP_ATOMIC, order);
} while (inode_hashtable == NULL && --order >= 0);
+ printk("VFS: inode hash table entries: %d (order: %ld, %ld bytes)\n",
+ nr_hash, order, (PAGE_SIZE << order));
+
if (!inode_hashtable)
panic("Failed to allocate inode hash table\n");
-
- printk("VFS: INODE hash table configured to %d entries\n", nr_hash);
head = inode_hashtable;
i = nr_hash;
diff -urN -X dontdiff linux/fs/proc/kcore.c work/fs/proc/kcore.c
--- linux/fs/proc/kcore.c Sun Feb 27 04:33:07 2000
+++ work/fs/proc/kcore.c Tue May 2 16:42:51 2000
@@ -315,13 +315,12 @@
size_t elf_buflen;
int num_vma;
- /* XXX we need to somehow lock vmlist between here
- * and after elf_kcore_store_hdr() returns.
- * For now assume that num_vma does not change (TA)
- */
+ read_lock(&vmlist_lock);
proc_root_kcore->size = size = get_kcore_size(&num_vma, &elf_buflen);
- if (buflen == 0 || *fpos >= size)
+ if (buflen == 0 || *fpos >= size) {
+ read_unlock(&vmlist_lock);
return 0;
+ }
/* trim buflen to not go beyond EOF */
if (buflen > size - *fpos)
@@ -335,10 +334,13 @@
if (buflen < tsz)
tsz = buflen;
elf_buf = kmalloc(elf_buflen, GFP_ATOMIC);
- if (!elf_buf)
+ if (!elf_buf) {
+ read_unlock(&vmlist_lock);
return -ENOMEM;
+ }
memset(elf_buf, 0, elf_buflen);
elf_kcore_store_hdr(elf_buf, num_vma, elf_buflen);
+ read_unlock(&vmlist_lock);
if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
kfree(elf_buf);
return -EFAULT;
@@ -352,7 +354,8 @@
/* leave now if filled buffer already */
if (buflen == 0)
return acc;
- }
+ } else
+ read_unlock(&vmlist_lock);
/* where page 0 not mapped, write zeros into buffer */
#if defined (__i386__) || defined (__mc68000__)
diff -urN -X dontdiff linux/fs/super.c work/fs/super.c
--- linux/fs/super.c Sat Apr 29 16:06:34 2000
+++ work/fs/super.c Tue May 2 17:09:29 2000
@@ -3,10 +3,12 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
- * super.c contains code to handle: - mount structures
+ * super.c contains code to handle: - filesystem drivers list
+ * - mount structures
* - super-block tables.
* - mount system call
* - umount system call
+ * - ustat system call
*
* Added options to /proc/mounts
* Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
@@ -74,7 +76,7 @@
*/
static struct file_system_type *file_systems = NULL;
-static spinlock_t file_systems_lock = SPIN_LOCK_UNLOCKED;
+static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
/* WARNING: This can be used only if we _already_ own a reference */
static void get_filesystem(struct file_system_type *fs)
@@ -120,13 +122,13 @@
return -EINVAL;
if (fs->next)
return -EBUSY;
- spin_lock(&file_systems_lock);
+ write_lock(&file_systems_lock);
p = find_filesystem(fs->name);
if (*p)
res = -EBUSY;
else
*p = fs;
- spin_unlock(&file_systems_lock);
+ write_unlock(&file_systems_lock);
return res;
}
@@ -146,18 +148,18 @@
{
struct file_system_type ** tmp;
- spin_lock(&file_systems_lock);
+ write_lock(&file_systems_lock);
tmp = &file_systems;
while (*tmp) {
if (fs == *tmp) {
*tmp = fs->next;
fs->next = NULL;
- spin_unlock(&file_systems_lock);
+ write_unlock(&file_systems_lock);
return 0;
}
tmp = &(*tmp)->next;
}
- spin_unlock(&file_systems_lock);
+ write_unlock(&file_systems_lock);
return -EINVAL;
}
@@ -173,14 +175,14 @@
return err;
err = -EINVAL;
- spin_lock(&file_systems_lock);
+ read_lock(&file_systems_lock);
for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
if (strcmp(tmp->name,name) == 0) {
err = index;
break;
}
}
- spin_unlock(&file_systems_lock);
+ read_unlock(&file_systems_lock);
putname(name);
return err;
}
@@ -190,11 +192,11 @@
struct file_system_type * tmp;
int len, res;
- spin_lock(&file_systems_lock);
+ read_lock(&file_systems_lock);
for (tmp = file_systems; tmp; tmp = tmp->next, index--)
if (index <= 0 && try_inc_mod_count(tmp->owner))
break;
- spin_unlock(&file_systems_lock);
+ read_unlock(&file_systems_lock);
if (!tmp)
return -EINVAL;
@@ -210,10 +212,10 @@
struct file_system_type * tmp;
int index;
- spin_lock(&file_systems_lock);
+ read_lock(&file_systems_lock);
for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
;
- spin_unlock(&file_systems_lock);
+ read_unlock(&file_systems_lock);
return index;
}
@@ -245,7 +247,7 @@
int len = 0;
struct file_system_type * tmp;
- spin_lock(&file_systems_lock);
+ read_lock(&file_systems_lock);
tmp = file_systems;
while (tmp && len < PAGE_SIZE - 80) {
len += sprintf(buf+len, "%s\t%s\n",
@@ -253,7 +255,7 @@
tmp->name);
tmp = tmp->next;
}
- spin_unlock(&file_systems_lock);
+ read_unlock(&file_systems_lock);
return len;
}
@@ -261,17 +263,17 @@
{
struct file_system_type *fs;
- spin_lock(&file_systems_lock);
+ read_lock(&file_systems_lock);
fs = *(find_filesystem(name));
if (fs && !try_inc_mod_count(fs->owner))
fs = NULL;
- spin_unlock(&file_systems_lock);
+ read_unlock(&file_systems_lock);
if (!fs && (request_module(name) == 0)) {
- spin_lock(&file_systems_lock);
+ read_lock(&file_systems_lock);
fs = *(find_filesystem(name));
if (fs && !try_inc_mod_count(fs->owner))
fs = NULL;
- spin_unlock(&file_systems_lock);
+ read_unlock(&file_systems_lock);
}
return fs;
}
@@ -288,7 +290,7 @@
struct vfsmount *mnt;
char *name;
- mnt = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
+ mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
if (!mnt)
goto out;
memset(mnt, 0, sizeof(struct vfsmount));
@@ -302,14 +304,14 @@
/* N.B. Is it really OK to have a vfsmount without names? */
if (dev_name) {
- name = (char *) kmalloc(strlen(dev_name)+1, GFP_KERNEL);
+ name = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
if (name) {
strcpy(name, dev_name);
mnt->mnt_devname = name;
}
}
if (dir_name) {
- name = (char *) kmalloc(strlen(dir_name)+1, GFP_KERNEL);
+ name = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
if (name) {
strcpy(name, dir_name);
mnt->mnt_dirname = name;
@@ -336,12 +338,12 @@
char *new_devname = NULL, *new_dirname = NULL;
if (dev_name) {
- new_devname = (char *) kmalloc(strlen(dev_name)+1, GFP_KERNEL);
+ new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
if (new_devname)
strcpy(new_devname, dev_name);
}
if (dir_name) {
- new_dirname = (char *) kmalloc(strlen(dir_name)+1, GFP_KERNEL);
+ new_dirname = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
if (new_dirname)
strcpy(new_dirname, dir_name);
}
@@ -796,6 +798,7 @@
{
struct block_device *bdev;
kdev_t dev;
+
dput(sb->s_root);
sb->s_root = NULL;
lock_super(sb);
@@ -807,10 +810,9 @@
}
/* Forget any remaining inodes */
- if (invalidate_inodes(sb)) {
+ if (invalidate_inodes(sb))
printk("VFS: Busy inodes after unmount. "
"Self-destruct in 5 seconds. Have a nice day...\n");
- }
dev = sb->s_dev;
sb->s_dev = 0; /* Free the superblock */
@@ -993,7 +995,6 @@
struct nameidata nd;
char *kname;
int retval;
- struct super_block *sb;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1009,7 +1010,6 @@
putname(kname);
if (retval)
goto out;
- sb = nd.dentry->d_inode->i_sb;
retval = -EINVAL;
if (nd.dentry!=nd.mnt->mnt_root)
goto dput_and_out;
@@ -1330,12 +1330,8 @@
ROOT_DEV = MKDEV (major, minor);
}
- /*
- * Probably pure paranoia, but I'm less than happy about delving into
- * devfs crap and checking it right now. Later.
- */
if (!ROOT_DEV)
- panic("I have no root and I want to scream");
+ panic ("Please append a correct \"root=\" boot option");
bdev = bdget(kdev_t_to_nr(ROOT_DEV));
if (!bdev)
@@ -1369,22 +1365,21 @@
goto mount_it;
}
- spin_lock(&file_systems_lock);
+ read_lock(&file_systems_lock);
for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
continue;
if (!try_inc_mod_count(fs_type->owner))
continue;
- spin_unlock(&file_systems_lock);
+ read_unlock(&file_systems_lock);
sb = read_super(ROOT_DEV,bdev,fs_type,root_mountflags,NULL,1);
if (sb)
goto mount_it;
- spin_lock(&file_systems_lock);
+ read_lock(&file_systems_lock);
put_filesystem(fs_type);
}
- spin_unlock(&file_systems_lock);
- panic("VFS: Unable to mount root fs on %s",
- kdevname(ROOT_DEV));
+ read_unlock(&file_systems_lock);
+ panic("VFS: Unable to mount root fs on %s", kdevname(ROOT_DEV));
mount_it:
printk ("VFS: Mounted root (%s filesystem)%s.\n",
diff -urN -X dontdiff linux/include/linux/vmalloc.h work/include/linux/vmalloc.h
--- linux/include/linux/vmalloc.h Sat Mar 18 20:11:01 2000
+++ work/include/linux/vmalloc.h Tue May 2 17:07:17 2000
@@ -3,6 +3,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/spinlock.h>
#include <asm/pgtable.h>
@@ -24,6 +25,11 @@
void vmfree_area_pages(unsigned long address, unsigned long size);
int vmalloc_area_pages(unsigned long address, unsigned long size);
+/* vmlist_lock is a read-write spinlock that protects vmlist
+ * Used in mm/vmalloc.c (get_vm_area() and vfree()) and fs/proc/kcore.c.
+ */
+extern rwlock_t vmlist_lock;
+
extern struct vm_struct * vmlist;
#endif
diff -urN -X dontdiff linux/ipc/util.c work/ipc/util.c
--- linux/ipc/util.c Wed Mar 8 17:16:24 2000
+++ work/ipc/util.c Tue May 2 16:42:51 2000
@@ -159,25 +159,19 @@
void* ipc_alloc(int size)
{
void* out;
- if(size > PAGE_SIZE) {
- lock_kernel();
+ if(size > PAGE_SIZE)
out = vmalloc(size);
- unlock_kernel();
- } else {
+ else
out = kmalloc(size, GFP_KERNEL);
- }
return out;
}
void ipc_free(void* ptr, int size)
{
- if(size > PAGE_SIZE) {
- lock_kernel();
+ if(size > PAGE_SIZE)
vfree(ptr);
- unlock_kernel();
- } else {
+ else
kfree(ptr);
- }
}
/*
diff -urN -X dontdiff linux/mm/vmalloc.c work/mm/vmalloc.c
--- linux/mm/vmalloc.c Fri Mar 24 11:01:34 2000
+++ work/mm/vmalloc.c Tue May 2 16:42:51 2000
@@ -3,14 +3,17 @@
*
* Copyright (C) 1993 Linus Torvalds
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ * SMP-threaded vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
*/
#include <linux/malloc.h>
#include <linux/vmalloc.h>
+#include <linux/spinlock.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
+rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
struct vm_struct * vmlist = NULL;
static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned long size)
@@ -163,11 +166,13 @@
if (!area)
return NULL;
addr = VMALLOC_START;
+ write_lock(&vmlist_lock);
for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
if (size + addr < (unsigned long) tmp->addr)
break;
addr = tmp->size + (unsigned long) tmp->addr;
if (addr > VMALLOC_END-size) {
+ write_unlock(&vmlist_lock);
kfree(area);
return NULL;
}
@@ -177,6 +182,7 @@
area->size = size + PAGE_SIZE;
area->next = *p;
*p = area;
+ write_unlock(&vmlist_lock);
return area;
}
@@ -190,14 +196,17 @@
printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
return;
}
+ write_lock(&vmlist_lock);
for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
if (tmp->addr == addr) {
*p = tmp->next;
vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size);
kfree(tmp);
+ write_unlock(&vmlist_lock);
return;
}
}
+ write_unlock(&vmlist_lock);
printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", addr);
}
@@ -235,6 +244,7 @@
if ((unsigned long) addr + count < count)
count = -(unsigned long) addr;
+ read_lock(&vmlist_lock);
for (tmp = vmlist; tmp; tmp = tmp->next) {
vaddr = (char *) tmp->addr;
if (addr >= vaddr + tmp->size - PAGE_SIZE)
@@ -242,7 +252,7 @@
while (addr < vaddr) {
if (count == 0)
goto finished;
- put_user('\0', buf);
+ *buf = '\0';
buf++;
addr++;
count--;
@@ -251,12 +261,13 @@
do {
if (count == 0)
goto finished;
- put_user(*addr, buf);
+ *buf = *addr;
buf++;
addr++;
count--;
} while (--n > 0);
}
finished:
+ read_unlock(&vmlist_lock);
return buf - buf_start;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sun May 07 2000 - 21:00:10 EST