semaphore not held for f_op->write

Peter Moulder (reiter@netspace.net.au)
10 Mar 1998 13:14:49 +1100


I notice that we always down(&inode->i_sem) around calls to truncate
(except as new inode) and write.

However, it's recently been brought to my attention (because I put a
`assert (atomic_read(&inode->i_sem.count) < 1)' in ext2_file_write())
that no semaphore is held in the following situations:

i) core dumps

ii) writing to BSD process accounting logs

iii) writing to the quota file

iv) possibly some other places I haven't looked at (architecture- or
fs-dependent; and drivers/block/rd.c, where the writes are to the
ram device)

Is this a bug? Would a patch be accepted if it downed the semaphore
in the first 3 cases above? Would it be a bug to _add_ a semaphore
downing in the first 3 cases above?

Such a patch (against 2.1.89) appears below. Btw, it would be good if
someone familiar with quotas could confirm that the below can't cause
any deadlock problems. Does the kernel provide any proper means of
acquiring several locks at once (e.g. releasing all until all can be
acquired)?

The only other possible change I see would be in the binfmt_* files,
to move the `down(...);' line up 3 lines and make the `up(...);'
statement not conditional on has_dumped. Just an aesthetic /
maintainability judgement.

pjm.

diff -urN linux-2.1.89-std/fs/binfmt_aout.c linux-2.1.89-045/fs/binfmt_aout.c
--- linux-2.1.89-std/fs/binfmt_aout.c Mon Dec 8 06:02:13 1997
+++ linux-2.1.89-045/fs/binfmt_aout.c Mon Mar 9 21:13:38 1998
@@ -132,6 +132,7 @@
if (!file.f_op->write)
goto close_coredump;
has_dumped = 1;
+ down(&inode->i_sem);
current->flags |= PF_DUMPCORE;
strncpy(dump.u_comm, current->comm, sizeof(current->comm));
#ifndef __sparc__
@@ -212,6 +213,8 @@
close_coredump:
if (file.f_op->release)
file.f_op->release(inode,&file);
+ if (has_dumped)
+ up(&inode->i_sem);
put_write_access(inode);
end_coredump:
set_fs(fs);
diff -urN linux-2.1.89-std/fs/binfmt_elf.c linux-2.1.89-045/fs/binfmt_elf.c
--- linux-2.1.89-std/fs/binfmt_elf.c Mon Mar 9 21:08:47 1998
+++ linux-2.1.89-045/fs/binfmt_elf.c Mon Mar 9 21:13:38 1998
@@ -1131,6 +1131,7 @@
if (!file.f_op->write)
goto close_coredump;
has_dumped = 1;
+ down(&inode->i_sem);
current->flags |= PF_DUMPCORE;

DUMP_WRITE(&elf, sizeof(elf));
@@ -1315,6 +1316,8 @@
close_coredump:
if (file.f_op->release)
file.f_op->release(inode,&file);
+ if (has_dumped)
+ up(&inode->i_sem);

end_coredump:
set_fs(fs);
diff -urN linux-2.1.89-std/fs/dquot.c linux-2.1.89-045/fs/dquot.c
--- linux-2.1.89-std/fs/dquot.c Mon Mar 9 21:08:49 1998
+++ linux-2.1.89-045/fs/dquot.c Mon Mar 9 21:13:38 1998
@@ -228,6 +228,7 @@
{
short type = dquot->dq_type;
struct file *filp = dquot->dq_mnt->mnt_quotas[type];
+ struct inode *inode = filp->f_dentry->d_inode;
mm_segment_t fs;
loff_t offset;

@@ -235,13 +236,15 @@
return;
lock_dquot(dquot);
down(&dquot->dq_mnt->mnt_sem);
+ down(&inode->i_sem);
offset = dqoff(dquot->dq_id);
fs = get_fs();
set_fs(KERNEL_DS);
-
+
if (filp->f_op->write(filp, (char *)&dquot->dq_dqb, sizeof(struct dqblk), &offset) == sizeof(struct dqblk))
dquot->dq_flags &= ~DQ_MOD;

+ up(&inode->i_sem);
up(&dquot->dq_mnt->mnt_sem);
set_fs(fs);
unlock_dquot(dquot);
diff -urN linux-2.1.89-std/kernel/acct.c linux-2.1.89-045/kernel/acct.c
--- linux-2.1.89-std/kernel/acct.c Mon Mar 9 21:09:02 1998
+++ linux-2.1.89-045/kernel/acct.c Mon Mar 9 21:13:46 1998
@@ -254,6 +254,7 @@
struct acct ac;
mm_segment_t fs;
unsigned long vsize;
+ struct inode *inode;

/*
* First check to see if there is enough free_space to continue the process
@@ -315,8 +316,11 @@
*/
fs = get_fs();
set_fs(KERNEL_DS);
+ inode = acct_file->f_dentry->d_inode;
+ down(&inode->i_sem);
acct_file->f_op->write(acct_file, (char *)&ac,
sizeof(struct acct), &acct_file->f_pos);
+ up(&inode->i_sem);
set_fs(fs);
return 0;
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu