[FIX] Quotas

Jan Kara (jack@atrey.karlin.mff.cuni.cz)
Thu, 17 Jun 1999 17:13:12 +0200


--XVdCdHGoSgWSkgj5
Content-Type: text/plain; charset=us-ascii

Hello.

Here are my fixes for quotas for 2.2.10. Changes against 2.2.9 are:

* Added locking when walking through inode lists (inode_lock).
* Check for bogus uid/gid in quotactl -- ie. id > 65535
* Fixed check for group in quota

Fixes for 2.2.9 were:

* Added check to dqget() so it now can't return invalidated quota (could happen when quota
was invalidated while we were sleeping).
* Fixed races in dquot_transfer -- quota now locked before testing and unlocked after
updating so it can't change in the mean time.
* Same for dquot_(free/alloc)_(block/inode).
* Fixed race in DQUOT_TRANSFER - we could move quota from user A to user B, then find out
that notify_change() can't be done so we want to move quota back but we can't -- ie.
quota is reached and we leave quota to user B. Might be even exploitable when we think
about groups.
* invalidate_dquot() writes modified quotas -- when invalidate is in process somebody
might hold quota too (some operation like get_quota) so it needn't to be written by dqput.
* sync_dquot did only one pass (on the second one the pointer wasn't initialized). Fixed.
* sync_dquot could call write_dquot on invalidated dquot. Fixed.
* When there was no memory for new dquot but we hadn't reached MAX_DQUOTS, we were trying to
allocate some memory in tight loop forever...
* grow_dquots() now initializes wait_queue correctly
* add_dquot_ref() is restarting after each blocking operation (filp list might change)
* reset_dquot_ptrs() was going just through filps so we could easily forget some quota in i-cache
Now reset_dquot_ptrs() is going through inode lists.
* dqput() didn't check whether quota isn't invalidated after blocking which could result in NULL dereference.
* write_dquot() might need to update written quota (when we were allocating next block for our
file) which resulted in deadlock (I can't understand how there could exist 2 people seeing it :-)).
* (read/write)_dquot didn't check, whether quota wasn't invalidated while they were sleeping. Ooops.
* There could be races between concurrent quota_ons and quota_offs (quite unprobable but theoretically possible).
Added new dqoff_sem semaphore serializing quota_ons and quota_offs.
* Some other minor cleanup...

Patches can be downloaded at ftp://atrey.karlin.mff.cuni.cz/pub/local/jack/. quota-fix-2.2.10-1.diff.gz is patch
against clean 2.2.10, quota-fix-2.2.10-1a.diff is against kernel with my previous patches. Patch against
clean kernel is also attached.

Linus, please, apply my patch (or at least mail me, what you dislike... :)).

Honza.

--XVdCdHGoSgWSkgj5
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="quota-fix-2.2.10-1.diff"

--- linux/fs/dquot.c Wed Jun 16 21:54:33 1999
+++ linux/fs/dquot.c Wed Jun 16 22:39:30 1999
@@ -21,6 +21,17 @@
* Revised list management to avoid races
* -- Bill Hawes, <whawes@star.net>, 9/98
*
+ * Fixed races in dquot_transfer(), dqget() and dquot_alloc_...().
+ * As the consequence the locking was moved from dquot_decr_...(),
+ * dquot_incr_...() to calling functions.
+ * invalidate_dquots() now writes modified dquots.
+ * Serialized quota_off() and quota_on() for mount point.
+ * Fixed a few bugs in grow_dquots. Fixed deadlock in write_dquot().
+ * reset_dquot_ptrs() now traverse through inodes not filps.
+ * add_dquot_ref() restarts after blocking
+ * Added check for bogus uid and fixed check for group in quotactl.
+ * Jan Kara, <jack@atrey.karlin.mff.cuni.cz>, 4-6/99
+ *
* (C) Copyright 1994 - 1997 Marco van Wieringen
*/

@@ -48,6 +59,10 @@
int nr_dquots = 0, nr_free_dquots = 0;
int max_dquots = NR_DQUOTS;

+/* We need this list for invalidating dquots... */
+extern struct list_head inode_in_use;
+extern spinlock_t inode_lock;
+
static char quotamessage[MAX_QUOTA_MESSAGE];
static char *quotatypes[] = INITQFNAMES;

@@ -166,7 +181,8 @@
{
/* sanity check */
if (list_empty(&dquot->dq_free)) {
- printk("remove_free_dquot: dquot not on free list??\n");
+ printk("remove_free_dquot: dquot not on the free list??\n");
+ return; /* J.K. Just don't do anything */
}
list_del(&dquot->dq_free);
INIT_LIST_HEAD(&dquot->dq_free);
@@ -229,13 +245,25 @@
static void write_dquot(struct dquot *dquot)
{
short type = dquot->dq_type;
- struct file *filp = dquot->dq_mnt->mnt_dquot.files[type];
+ struct file *filp;
mm_segment_t fs;
loff_t offset;
ssize_t ret;
+ struct dqblk data;
+ struct semaphore *sem = &dquot->dq_mnt->mnt_dquot.dqio_sem;

- lock_dquot(dquot);
- down(&dquot->dq_mnt->mnt_dquot.semaphore);
+ /*
+ * We copy our data to preserve consistency when dquot is not locked.
+ * We can't lock it because it can cause deadlocks - think about
+ * growing the quota file...
+ */
+ memcpy(&data, &dquot->dq_dqb, sizeof(struct dqblk));
+ down(sem);
+ if (!dquot->dq_mnt) { /* Invalidated quota? */
+ up(sem);
+ return;
+ }
+ filp = dquot->dq_mnt->mnt_dquot.files[type];
offset = dqoff(dquot->dq_id);
fs = get_fs();
set_fs(KERNEL_DS);
@@ -247,46 +275,51 @@
dquot->dq_flags &= ~DQ_MOD;
ret = 0;
if (filp)
- ret = filp->f_op->write(filp, (char *)&dquot->dq_dqb,
+ ret = filp->f_op->write(filp, (char *)&data,
sizeof(struct dqblk), &offset);
if (ret != sizeof(struct dqblk))
printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
kdevname(dquot->dq_dev));

- up(&dquot->dq_mnt->mnt_dquot.semaphore);
+ /*
+ * Be sure that anybody didn't invalidated the dquot in the mean time...
+ * Nasty but I don't see other choice when we don't want to lock dquot.
+ */
+ up(sem);
set_fs(fs);

- unlock_dquot(dquot);
dqstats.writes++;
}

static void read_dquot(struct dquot *dquot)
{
- short type;
+ short type = dquot->dq_type;
struct file *filp;
mm_segment_t fs;
loff_t offset;

- type = dquot->dq_type;
filp = dquot->dq_mnt->mnt_dquot.files[type];
-
if (filp == (struct file *)NULL)
return;

lock_dquot(dquot);
- down(&dquot->dq_mnt->mnt_dquot.semaphore);
+ if (!dquot->dq_mnt) /* Invalidated quota? */
+ goto out_lock;
+ /* Now we are sure filp is valid - the dquot isn't invalidated */
+ down(&dquot->dq_mnt->mnt_dquot.dqio_sem);
offset = dqoff(dquot->dq_id);
fs = get_fs();
set_fs(KERNEL_DS);
filp->f_op->read(filp, (char *)&dquot->dq_dqb, sizeof(struct dqblk), &offset);
- up(&dquot->dq_mnt->mnt_dquot.semaphore);
+ up(&dquot->dq_mnt->mnt_dquot.dqio_sem);
set_fs(fs);

if (dquot->dq_bhardlimit == 0 && dquot->dq_bsoftlimit == 0 &&
dquot->dq_ihardlimit == 0 && dquot->dq_isoftlimit == 0)
dquot->dq_flags |= DQ_FAKE;
- unlock_dquot(dquot);
dqstats.reads++;
+out_lock:
+ unlock_dquot(dquot);
}

/*
@@ -306,10 +339,11 @@

void invalidate_dquots(kdev_t dev, short type)
{
- struct dquot *dquot, *next = inuse_list;
+ struct dquot *dquot, *next;
int need_restart;

restart:
+ next = inuse_list; /* Here it is better. Otherwise the restart doesn't have any sense ;-) */
need_restart = 0;
while ((dquot = next) != NULL) {
next = dquot->dq_next;
@@ -317,6 +351,8 @@
continue;
if (dquot->dq_type != type)
continue;
+ if (!dquot->dq_mnt) /* Already invalidated entry? */
+ continue;
if (dquot->dq_flags & DQ_LOCKED) {
__wait_on_dquot(dquot);

@@ -329,6 +365,18 @@
continue;
if (dquot->dq_type != type)
continue;
+ if (!dquot->dq_mnt)
+ continue;
+ }
+ /*
+ * Because inodes needn't to be the only holders of dquot
+ * the quota needn't to be written to disk. So we write it
+ * ourselves before discarding the data just for sure...
+ */
+ if (dquot->dq_flags & DQ_MOD && dquot->dq_mnt)
+ {
+ write_dquot(dquot);
+ need_restart = 1; /* We slept on IO */
}
clear_dquot(dquot);
}
@@ -342,10 +390,11 @@

int sync_dquots(kdev_t dev, short type)
{
- struct dquot *dquot, *next = inuse_list;
+ struct dquot *dquot, *next;
int need_restart;

restart:
+ next = inuse_list;
need_restart = 0;
while ((dquot = next) != NULL) {
next = dquot->dq_next;
@@ -353,6 +402,8 @@
continue;
if (type != -1 && dquot->dq_type != type)
continue;
+ if (!dquot->dq_mnt) /* Invalidated? */
+ continue;
if (!(dquot->dq_flags & (DQ_LOCKED | DQ_MOD)))
continue;

@@ -390,10 +441,11 @@
* checking and doesn't need to be written. It's just an empty
* dquot that is put back on to the freelist.
*/
- if (dquot->dq_mnt != (struct vfsmount *)NULL) {
+ if (dquot->dq_mnt)
dqstats.drops++;
we_slept:
- wait_on_dquot(dquot);
+ wait_on_dquot(dquot);
+ if (dquot->dq_mnt) {
if (dquot->dq_count > 1) {
dquot->dq_count--;
return;
@@ -407,8 +459,16 @@
/* sanity check */
if (!list_empty(&dquot->dq_free)) {
printk("dqput: dquot already on free list??\n");
+ dquot->dq_count--; /* J.K. Just decrementing use count seems safer... */
+ return;
}
if (--dquot->dq_count == 0) {
+ /* Sanity check. Locked quota without owner isn't good idea... */
+ if (dquot->dq_flags & DQ_LOCKED) {
+ printk(KERN_ERR "VFS: Locked quota to be put on the free list.\n");
+ dquot->dq_flags &= ~DQ_LOCKED;
+ }
+ dquot->dq_flags &= ~DQ_MOD; /* Modified flag has no sense on free list */
/* Place at end of LRU free queue */
put_dquot_last(dquot);
wake_up(&dquot_wait);
@@ -417,23 +477,25 @@
return;
}

-static void grow_dquots(void)
+static int grow_dquots(void)
{
struct dquot *dquot;
- int cnt = 32;
+ int cnt = 0;

- while (cnt > 0) {
+ while (cnt < 32) {
dquot = kmem_cache_alloc(dquot_cachep, SLAB_KERNEL);
if(!dquot)
- return;
+ return cnt;

nr_dquots++;
memset((caddr_t)dquot, 0, sizeof(struct dquot));
+ init_waitqueue(&dquot->dq_wait);
/* all dquots go on the inuse_list */
put_inuse(dquot);
put_dquot_head(dquot);
- cnt--;
+ cnt++;
}
+ return cnt;
}

static struct dquot *find_best_candidate_weighted(void)
@@ -445,6 +507,7 @@

while ((tmp = tmp->next) != &free_dquots && --limit) {
dquot = list_entry(tmp, struct dquot, dq_free);
+ /* This should never happen... */
if (dquot->dq_flags & (DQ_LOCKED | DQ_MOD))
continue;
myscore = dquot->dq_referenced;
@@ -480,20 +543,9 @@
if (!dquot)
goto pressure;
got_it:
- if (dquot->dq_flags & (DQ_LOCKED | DQ_MOD)) {
- wait_on_dquot(dquot);
- if (dquot->dq_flags & DQ_MOD)
- {
- if(dquot->dq_mnt != (struct vfsmount *)NULL)
- write_dquot(dquot);
- }
- /*
- * The dquot may be back in use now, so we
- * must recheck the free list.
- */
- goto repeat;
- }
- /* sanity check ... */
+ /* Sanity checks */
+ if (dquot->dq_flags & DQ_LOCKED)
+ printk(KERN_ERR "VFS: Locked dquot on the free list\n");
if (dquot->dq_count != 0)
printk(KERN_ERR "VFS: free dquot count=%d\n", dquot->dq_count);

@@ -504,10 +556,9 @@
return dquot;

pressure:
- if (nr_dquots < max_dquots) {
- grow_dquots();
- goto repeat;
- }
+ if (nr_dquots < max_dquots)
+ if (grow_dquots())
+ goto repeat;

dquot = find_best_candidate_weighted();
if (dquot)
@@ -534,7 +585,7 @@
struct dquot *dquot, *empty = NULL;
struct vfsmount *vfsmnt;

- if ((vfsmnt = lookup_vfsmnt(dev)) == (struct vfsmount *)NULL || is_enabled(vfsmnt, type) == 0)
+ if ((vfsmnt = lookup_vfsmnt(dev)) == (struct vfsmount *)NULL || !is_enabled(vfsmnt, type))
return(NODQUOT);

we_slept:
@@ -567,12 +618,46 @@
while (dquot_updating[hashent])
sleep_on(&update_wait);

+ if (!dquot->dq_mnt) { /* Has somebody invalidated entry under us? */
+ /*
+ * Do it as if the quota was invalidated before we started
+ */
+ dqput(dquot);
+ return NODQUOT;
+ }
dquot->dq_referenced++;
dqstats.lookups++;

return dquot;
}

+static struct dquot *dqduplicate(struct dquot *dquot)
+{
+ if (dquot == NODQUOT || !dquot->dq_mnt)
+ return NODQUOT;
+ dquot->dq_count++;
+ wait_on_dquot(dquot);
+ if (!dquot->dq_mnt) {
+ dquot->dq_count--;
+ return NODQUOT;
+ }
+ return dquot;
+}
+
+static int dqinit_needed(struct inode *inode, short type)
+{
+ int cnt;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
+ return 0;
+ if (type != -1)
+ return inode->i_dquot[type] == NODQUOT;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (inode->i_dquot[cnt] == NODQUOT)
+ return 1;
+ return 0;
+}
+
static void add_dquot_ref(kdev_t dev, short type)
{
struct super_block *sb = get_super(dev);
@@ -582,6 +667,7 @@
if (!sb || !sb->dq_op)
return; /* nothing to do */

+restart:
for (filp = inuse_filps; filp; filp = filp->f_next) {
if (!filp->f_dentry)
continue;
@@ -590,79 +676,97 @@
inode = filp->f_dentry->d_inode;
if (!inode)
continue;
- /* N.B. race problem -- filp could become unused */
- if (filp->f_mode & FMODE_WRITE) {
+ /* Didn't we already initialized this inode? */
+ if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) {
sb->dq_op->initialize(inode, type);
inode->i_flags |= S_QUOTA;
+ /* as we may have blocked we had better restart */
+ goto restart;
}
}
}

+static int reset_inode_dquot_ptrs(struct inode *inode, short type)
+{
+ struct dquot *dquot = inode->i_dquot[type];
+ int cnt;
+
+ inode->i_dquot[type] = NODQUOT;
+ /* any other quota in use? */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (inode->i_dquot[cnt] != NODQUOT)
+ goto put_it;
+ }
+ inode->i_flags &= ~S_QUOTA;
+put_it:
+ if (dquot != NODQUOT) {
+ spin_unlock(&inode_lock); /* We may block so drop the lock... */
+ dqput(dquot);
+ spin_lock(&inode_lock); /* And capture lock again */
+ /* we may have blocked ... */
+ return 1;
+ }
+ return 0;
+}
+
static void reset_dquot_ptrs(kdev_t dev, short type)
{
struct super_block *sb = get_super(dev);
- struct file *filp;
struct inode *inode;
- struct dquot *dquot;
- int cnt;
+ struct list_head *act_head;
+ int need_list = 3;

if (!sb || !sb->dq_op)
return; /* nothing to do */

-restart:
- /* free any quota for unused dentries */
- shrink_dcache_sb(sb);
+ /* We have to be protected against other CPUs */
+ spin_lock(&inode_lock);

- for (filp = inuse_filps; filp; filp = filp->f_next) {
- if (!filp->f_dentry)
- continue;
- if (filp->f_dentry->d_sb != sb)
- continue;
- inode = filp->f_dentry->d_inode;
- if (!inode)
- continue;
- /*
- * Note: we restart after each blocking operation,
- * as the inuse_filps list may have changed.
- */
- if (IS_QUOTAINIT(inode)) {
- dquot = inode->i_dquot[type];
- inode->i_dquot[type] = NODQUOT;
- /* any other quota in use? */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] != NODQUOT)
- goto put_it;
+ do {
+ if (need_list & 1) {
+ need_list &= ~1;
+ restart_in_use:
+ for (act_head = inode_in_use.next; act_head != &inode_in_use; act_head = act_head->next) {
+ inode = list_entry(act_head, struct inode, i_list);
+ if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
+ continue;
+ if (reset_inode_dquot_ptrs(inode, type)) {
+ need_list |= 2;
+ goto restart_in_use;
+ }
}
- inode->i_flags &= ~S_QUOTA;
- put_it:
- if (dquot != NODQUOT) {
- dqput(dquot);
- /* we may have blocked ... */
- goto restart;
+ }
+ if (need_list & 2) {
+ need_list &= ~2;
+ restart_dirty:
+ for (act_head = sb->s_dirty.next; act_head != &sb->s_dirty; act_head = act_head->next) {
+ inode = list_entry(act_head, struct inode, i_list);
+ if (IS_QUOTAINIT(inode) && reset_inode_dquot_ptrs(inode, type)) {
+ need_list |= 1;
+ goto restart_dirty;
+ }
}
}
}
+ while (need_list);
+
+ spin_unlock(&inode_lock);
}

static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
{
- lock_dquot(dquot);
dquot->dq_curinodes += number;
dquot->dq_flags |= DQ_MOD;
- unlock_dquot(dquot);
}

static inline void dquot_incr_blocks(struct dquot *dquot, unsigned long number)
{
- lock_dquot(dquot);
dquot->dq_curblocks += number;
dquot->dq_flags |= DQ_MOD;
- unlock_dquot(dquot);
}

static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
{
- lock_dquot(dquot);
if (dquot->dq_curinodes > number)
dquot->dq_curinodes -= number;
else
@@ -671,12 +775,10 @@
dquot->dq_itime = (time_t) 0;
dquot->dq_flags &= ~DQ_INODES;
dquot->dq_flags |= DQ_MOD;
- unlock_dquot(dquot);
}

static inline void dquot_decr_blocks(struct dquot *dquot, unsigned long number)
{
- lock_dquot(dquot);
if (dquot->dq_curblocks > number)
dquot->dq_curblocks -= number;
else
@@ -685,7 +787,6 @@
dquot->dq_btime = (time_t) 0;
dquot->dq_flags &= ~DQ_BLKS;
dquot->dq_flags |= DQ_MOD;
- unlock_dquot(dquot);
}

static inline char need_print_warning(short type, uid_t initiator, struct dquot *dquot)
@@ -704,7 +805,7 @@
return(initiator == 0 && dquot->dq_mnt->mnt_dquot.rsquash[dquot->dq_type] == 0);
}

-static int check_idq(struct dquot *dquot, short type, u_long short inodes, uid_t initiator,
+static int check_idq(struct dquot *dquot, short type, u_long inodes, uid_t initiator,
struct tty_struct *tty)
{
if (inodes <= 0 || dquot->dq_flags & DQ_FAKE)
@@ -869,9 +970,11 @@
if (dquot == NODQUOT)
goto out;

+ lock_dquot(dquot); /* We must protect against invalidating the quota */
error = -EFAULT;
if (dqblk && !copy_to_user(dqblk, &dquot->dq_dqb, sizeof(struct dqblk)))
error = 0;
+ unlock_dquot(dquot);
dqput(dquot);
out:
return error;
@@ -963,6 +1066,8 @@
break;
}
dquot = dqget(inode->i_dev, id, cnt);
+ if (dquot == NODQUOT)
+ continue;
if (inode->i_dquot[cnt] != NODQUOT) {
dqput(dquot);
continue;
@@ -1000,23 +1105,36 @@
int dquot_alloc_block(const struct inode *inode, unsigned long number, uid_t initiator,
char warn)
{
- unsigned short cnt;
+ int cnt;
struct tty_struct *tty = current->tty;
+ struct dquot *dquot[MAXQUOTAS];

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ dquot[cnt] = dqduplicate(inode->i_dquot[cnt]);
+ if (dquot[cnt] == NODQUOT)
continue;
- if (check_bdq(inode->i_dquot[cnt], cnt, number, initiator, tty, warn))
- return(NO_QUOTA);
+ lock_dquot(dquot[cnt]);
+ if (check_bdq(dquot[cnt], cnt, number, initiator, tty, warn))
+ goto put_all;
}

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ if (dquot[cnt] == NODQUOT)
continue;
- dquot_incr_blocks(inode->i_dquot[cnt], number);
+ dquot_incr_blocks(dquot[cnt], number);
+ unlock_dquot(dquot[cnt]);
+ dqput(dquot[cnt]);
}

- return(QUOTA_OK);
+ return QUOTA_OK;
+put_all:
+ for (; cnt >= 0; cnt--) {
+ if (dquot[cnt] == NODQUOT)
+ continue;
+ unlock_dquot(dquot[cnt]);
+ dqput(dquot[cnt]);
+ }
+ return NO_QUOTA;
}

/*
@@ -1024,23 +1142,36 @@
*/
int dquot_alloc_inode(const struct inode *inode, unsigned long number, uid_t initiator)
{
- unsigned short cnt;
+ int cnt;
struct tty_struct *tty = current->tty;
+ struct dquot *dquot[MAXQUOTAS];

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ dquot[cnt] = dqduplicate(inode -> i_dquot[cnt]);
+ if (dquot[cnt] == NODQUOT)
continue;
- if (check_idq(inode->i_dquot[cnt], cnt, number, initiator, tty))
- return(NO_QUOTA);
+ lock_dquot(dquot[cnt]);
+ if (check_idq(dquot[cnt], cnt, number, initiator, tty))
+ goto put_all;
}

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ if (dquot[cnt] == NODQUOT)
continue;
- dquot_incr_inodes(inode->i_dquot[cnt], number);
+ dquot_incr_inodes(dquot[cnt], number);
+ unlock_dquot(dquot[cnt]);
+ dqput(dquot[cnt]);
}

- return(QUOTA_OK);
+ return QUOTA_OK;
+put_all:
+ for (; cnt >= 0; cnt--) {
+ if (dquot[cnt] == NODQUOT)
+ continue;
+ unlock_dquot(dquot[cnt]);
+ dqput(dquot[cnt]);
+ }
+ return NO_QUOTA;
}

/*
@@ -1049,11 +1180,14 @@
void dquot_free_block(const struct inode *inode, unsigned long number)
{
unsigned short cnt;
+ struct dquot *dquot;

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ dquot = inode->i_dquot[cnt];
+ if (dquot == NODQUOT)
continue;
- dquot_decr_blocks(inode->i_dquot[cnt], number);
+ wait_on_dquot(dquot);
+ dquot_decr_blocks(dquot, number);
}
}

@@ -1063,11 +1197,14 @@
void dquot_free_inode(const struct inode *inode, unsigned long number)
{
unsigned short cnt;
+ struct dquot *dquot;

for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] == NODQUOT)
+ dquot = inode->i_dquot[cnt];
+ if (dquot == NODQUOT)
continue;
- dquot_decr_inodes(inode->i_dquot[cnt], number);
+ wait_on_dquot(dquot);
+ dquot_decr_inodes(dquot, number);
}
}

@@ -1076,14 +1213,18 @@
*
* Note: this is a blocking operation.
*/
-int dquot_transfer(struct inode *inode, struct iattr *iattr, char direction, uid_t initiator)
+int dquot_transfer(struct dentry *dentry, struct iattr *iattr, uid_t initiator)
{
+ struct inode *inode = dentry -> d_inode;
unsigned long blocks;
struct dquot *transfer_from[MAXQUOTAS];
struct dquot *transfer_to[MAXQUOTAS];
struct tty_struct *tty = current->tty;
short cnt, disc;
+ int error = -EDQUOT;

+ if (!inode)
+ return -ENOENT;
/*
* Find out if this filesystem uses i_blocks.
*/
@@ -1107,27 +1248,60 @@
case USRQUOTA:
if (inode->i_uid == iattr->ia_uid)
continue;
- transfer_from[cnt] = dqget(inode->i_dev, (direction) ? iattr->ia_uid : inode->i_uid, cnt);
- transfer_to[cnt] = dqget(inode->i_dev, (direction) ? inode->i_uid : iattr->ia_uid, cnt);
+ /* We can get transfer_from from inode, can't we? */
+ transfer_from[cnt] = dqget(inode->i_dev, inode->i_uid, cnt);
+ transfer_to[cnt] = dqget(inode->i_dev, iattr->ia_uid, cnt);
break;
case GRPQUOTA:
if (inode->i_gid == iattr->ia_gid)
continue;
- transfer_from[cnt] = dqget(inode->i_dev, (direction) ? iattr->ia_gid : inode->i_gid, cnt);
- transfer_to[cnt] = dqget(inode->i_dev, (direction) ? inode->i_gid : iattr->ia_gid, cnt);
+ transfer_from[cnt] = dqget(inode->i_dev, inode->i_gid, cnt);
+ transfer_to[cnt] = dqget(inode->i_dev, iattr->ia_gid, cnt);
break;
}

- if (check_idq(transfer_to[cnt], cnt, 1, initiator, tty) == NO_QUOTA ||
- check_bdq(transfer_to[cnt], cnt, blocks, initiator, tty, 0) == NO_QUOTA) {
- for (disc = 0; disc <= cnt; disc++) {
- dqput(transfer_from[disc]);
- dqput(transfer_to[disc]);
+ /* Something bad (eg. quotaoff) happened while we were sleeping? */
+ if (transfer_from[cnt] == NODQUOT || transfer_to[cnt] == NODQUOT)
+ {
+ if (transfer_from[cnt] != NODQUOT) {
+ dqput(transfer_from[cnt]);
+ transfer_from[cnt] = NODQUOT;
+ }
+ if (transfer_to[cnt] != NODQUOT) {
+ dqput(transfer_to[cnt]);
+ transfer_to[cnt] = NODQUOT;
}
- return(NO_QUOTA);
+ continue;
+ }
+ /*
+ * We have to lock the quotas to prevent races...
+ */
+ if (transfer_from[cnt] < transfer_to[cnt])
+ {
+ lock_dquot(transfer_from[cnt]);
+ lock_dquot(transfer_to[cnt]);
+ }
+ else
+ {
+ lock_dquot(transfer_to[cnt]);
+ lock_dquot(transfer_from[cnt]);
+ }
+
+ /*
+ * The entries might got invalidated while locking. The second
+ * dqget() could block and so the first structure might got
+ * invalidated or locked...
+ */
+ if (!transfer_to[cnt]->dq_mnt || !transfer_from[cnt]->dq_mnt ||
+ check_idq(transfer_to[cnt], cnt, 1, initiator, tty) == NO_QUOTA ||
+ check_bdq(transfer_to[cnt], cnt, blocks, initiator, tty, 0) == NO_QUOTA) {
+ cnt++;
+ goto put_all;
}
}

+ if ((error = notify_change(dentry, iattr)))
+ goto put_all;
/*
* Finally perform the needed transfer from transfer_from to transfer_to,
* and release any pointers to dquots not needed anymore.
@@ -1139,28 +1313,39 @@
if (transfer_from[cnt] == NODQUOT && transfer_to[cnt] == NODQUOT)
continue;

- if (transfer_from[cnt] != NODQUOT) {
- dquot_decr_inodes(transfer_from[cnt], 1);
- dquot_decr_blocks(transfer_from[cnt], blocks);
- }
+ dquot_decr_inodes(transfer_from[cnt], 1);
+ dquot_decr_blocks(transfer_from[cnt], blocks);

- if (transfer_to[cnt] != NODQUOT) {
- dquot_incr_inodes(transfer_to[cnt], 1);
- dquot_incr_blocks(transfer_to[cnt], blocks);
- }
+ dquot_incr_inodes(transfer_to[cnt], 1);
+ dquot_incr_blocks(transfer_to[cnt], blocks);

+ unlock_dquot(transfer_from[cnt]);
+ dqput(transfer_from[cnt]);
if (inode->i_dquot[cnt] != NODQUOT) {
struct dquot *temp = inode->i_dquot[cnt];
inode->i_dquot[cnt] = transfer_to[cnt];
+ unlock_dquot(transfer_to[cnt]);
dqput(temp);
- dqput(transfer_from[cnt]);
} else {
- dqput(transfer_from[cnt]);
+ unlock_dquot(transfer_to[cnt]);
dqput(transfer_to[cnt]);
}
}

- return(QUOTA_OK);
+ return 0;
+put_all:
+ for (disc = 0; disc < cnt; disc++) {
+ /* There should be none or both pointers set but... */
+ if (transfer_to[disc] != NODQUOT) {
+ unlock_dquot(transfer_to[disc]);
+ dqput(transfer_to[disc]);
+ }
+ if (transfer_from[disc] != NODQUOT) {
+ unlock_dquot(transfer_from[disc]);
+ dqput(transfer_from[disc]);
+ }
+ }
+ return error;
}


@@ -1224,16 +1409,18 @@
struct vfsmount *vfsmnt;
struct file *filp;
short cnt;
+ int enabled = 0;

+ /* We don't need to search for vfsmnt each time - umount has to wait for us */
+ vfsmnt = lookup_vfsmnt(dev);
+ if (!vfsmnt || !vfsmnt->mnt_sb)
+ goto out;
+
+ /* We need to serialize quota_off() for device */
+ down(&vfsmnt->mnt_dquot.dqoff_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
-
- vfsmnt = lookup_vfsmnt(dev);
- if (!vfsmnt)
- goto out;
- if (!vfsmnt->mnt_sb)
- goto out;
if (!is_enabled(vfsmnt, cnt))
continue;
reset_enable_flags(vfsmnt, cnt);
@@ -1242,26 +1429,25 @@
reset_dquot_ptrs(dev, cnt);
invalidate_dquots(dev, cnt);

+ /* Wait for any pending IO - remove me as soon as invalidate is more polite */
+ down(&vfsmnt->mnt_dquot.dqio_sem);
filp = vfsmnt->mnt_dquot.files[cnt];
vfsmnt->mnt_dquot.files[cnt] = (struct file *)NULL;
vfsmnt->mnt_dquot.inode_expire[cnt] = 0;
vfsmnt->mnt_dquot.block_expire[cnt] = 0;
+ up(&vfsmnt->mnt_dquot.dqio_sem);
fput(filp);
- }
+ }

/*
* Check whether any quota is still enabled,
* and if not clear the dq_op pointer.
*/
- vfsmnt = lookup_vfsmnt(dev);
- if (vfsmnt && vfsmnt->mnt_sb) {
- int enabled = 0;
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- enabled |= is_enabled(vfsmnt, cnt);
- if (!enabled)
- vfsmnt->mnt_sb->dq_op = NULL;
- }
-
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ enabled |= is_enabled(vfsmnt, cnt);
+ if (!enabled)
+ vfsmnt->mnt_sb->dq_op = NULL;
+ up(&vfsmnt->mnt_dquot.dqoff_sem);
out:
return(0);
}
@@ -1282,47 +1468,51 @@

if (is_enabled(vfsmnt, type))
return -EBUSY;
- mnt_dquot = &vfsmnt->mnt_dquot;

+ mnt_dquot = &vfsmnt->mnt_dquot;
+ down(&mnt_dquot->dqoff_sem);
tmp = getname(path);
error = PTR_ERR(tmp);
if (IS_ERR(tmp))
- return error;
+ goto out_lock;

f = filp_open(tmp, O_RDWR, 0600);
putname(tmp);
- if (IS_ERR(f))
- return PTR_ERR(f);

- /* sanity checks */
+ error = PTR_ERR(f);
+ if (IS_ERR(f))
+ goto out_lock;
error = -EIO;
if (!f->f_op->read && !f->f_op->write)
- goto cleanup;
+ goto out_f;
inode = f->f_dentry->d_inode;
error = -EACCES;
if (!S_ISREG(inode->i_mode))
- goto cleanup;
+ goto out_f;
error = -EINVAL;
if (inode->i_size == 0 || (inode->i_size % sizeof(struct dqblk)) != 0)
- goto cleanup;
+ goto out_f;

- /* OK, there we go */
set_enable_flags(vfsmnt, type);
mnt_dquot->files[type] = f;

dquot = dqget(dev, 0, type);
- mnt_dquot->inode_expire[type] = (dquot) ? dquot->dq_itime : MAX_IQ_TIME;
- mnt_dquot->block_expire[type] = (dquot) ? dquot->dq_btime : MAX_DQ_TIME;
+ mnt_dquot->inode_expire[type] = (dquot != NODQUOT) ? dquot->dq_itime : MAX_IQ_TIME;
+ mnt_dquot->block_expire[type] = (dquot != NODQUOT) ? dquot->dq_btime : MAX_DQ_TIME;
dqput(dquot);

vfsmnt->mnt_sb->dq_op = &dquot_operations;
add_dquot_ref(dev, type);

- return(0);
+ up(&mnt_dquot->dqoff_sem);
+ return 0;

-cleanup:
- fput(f);
- return error;
+out_f:
+ filp_close(f, NULL);
+out_lock:
+ up(&mnt_dquot->dqoff_sem);
+
+ return error;
}

/*
@@ -1343,6 +1533,9 @@

if ((u_int) type >= MAXQUOTAS)
goto out;
+ if (id & ~0xFFFF)
+ goto out;
+
ret = -EPERM;
switch (cmds) {
case Q_SYNC:
@@ -1350,7 +1543,7 @@
break;
case Q_GETQUOTA:
if (((type == USRQUOTA && current->euid != id) ||
- (type == GRPQUOTA && current->egid != id)) &&
+ (type == GRPQUOTA && in_group_p(id))) &&
!capable(CAP_SYS_RESOURCE))
goto out;
break;
@@ -1360,7 +1553,7 @@
}

ret = -EINVAL;
- dev = 0;
+ dev = NODEV;
if (special != NULL || (cmds != Q_SYNC && cmds != Q_GETSTATS)) {
mode_t mode;
struct dentry * dentry;
--- linux/fs/super.c Wed Jun 16 22:08:10 1999
+++ linux/fs/super.c Wed Jun 16 22:25:14 1999
@@ -104,7 +104,8 @@
lptr->mnt_dev = sb->s_dev;
lptr->mnt_flags = sb->s_flags;

- sema_init(&lptr->mnt_dquot.semaphore, 1);
+ sema_init(&lptr->mnt_dquot.dqio_sem, 1);
+ sema_init(&lptr->mnt_dquot.dqoff_sem, 1);
lptr->mnt_dquot.flags = 0;

/* N.B. Is it really OK to have a vfsmount without names? */
--- linux/fs/inode.c Wed Jun 16 21:55:30 1999
+++ linux/fs/inode.c Thu May 13 22:30:38 1999
@@ -44,7 +44,7 @@
* allowing for low-overhead inode sync() operations.
*/

-static LIST_HEAD(inode_in_use);
+LIST_HEAD(inode_in_use);
static LIST_HEAD(inode_unused);
static struct list_head inode_hashtable[HASH_SIZE];

--- linux/include/linux/quotaops.h Wed Jun 16 21:59:26 1999
+++ linux/include/linux/quotaops.h Thu May 13 22:31:12 1999
@@ -31,8 +31,8 @@
extern void dquot_free_block(const struct inode *inode, unsigned long number);
extern void dquot_free_inode(const struct inode *inode, unsigned long number);

-extern int dquot_transfer(struct inode *inode, struct iattr *iattr,
- char direction, uid_t initiator);
+extern int dquot_transfer(struct dentry *dentry, struct iattr *iattr,
+ uid_t initiator);

/*
* Operations supported for diskquotas.
@@ -101,15 +101,10 @@
if (dentry->d_inode->i_sb->dq_op) {
if (IS_QUOTAINIT(dentry->d_inode) == 0)
dentry->d_inode->i_sb->dq_op->initialize(dentry->d_inode, -1);
- if (dentry->d_inode->i_sb->dq_op->transfer(dentry->d_inode, iattr, 0, current->fsuid))
- goto out;
- error = notify_change(dentry, iattr);
- if (error)
- dentry->d_inode->i_sb->dq_op->transfer(dentry->d_inode, iattr, 1, current->fsuid);
+ error = dentry->d_inode->i_sb->dq_op->transfer(dentry, iattr, current->fsuid);
} else {
error = notify_change(dentry, iattr);
}
-out:
return error;
}

--- linux/include/linux/fs.h Wed Jun 16 21:59:00 1999
+++ linux/include/linux/fs.h Thu May 13 22:31:12 1999
@@ -639,7 +639,7 @@
int (*alloc_inode) (const struct inode *, unsigned long, uid_t);
void (*free_block) (const struct inode *, unsigned long);
void (*free_inode) (const struct inode *, unsigned long);
- int (*transfer) (struct inode *, struct iattr *, char, uid_t);
+ int (*transfer) (struct dentry *, struct iattr *, uid_t);
};

struct file_system_type {
--- linux/include/linux/mount.h Wed Jun 16 21:57:32 1999
+++ linux/include/linux/mount.h Mon May 24 22:55:13 1999
@@ -17,11 +17,12 @@
struct quota_mount_options
{
unsigned int flags; /* Flags for diskquotas on this device */
- struct semaphore semaphore; /* lock device while I/O in progress */
+ struct semaphore dqio_sem; /* lock device while I/O in progress */
+ struct semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device */
struct file *files[MAXQUOTAS]; /* fp's to quotafiles */
time_t inode_expire[MAXQUOTAS]; /* expiretime for inode-quota */
time_t block_expire[MAXQUOTAS]; /* expiretime for block-quota */
- char rsquash[MAXQUOTAS]; /* for quotas threath root as any other user */
+ char rsquash[MAXQUOTAS]; /* for quotas threat root as any other user */
};

struct vfsmount

--XVdCdHGoSgWSkgj5--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/