REISER4: tracking down opps in reiser4_do_readpage_extent

From: Ignatich
Date: Fri Apr 06 2007 - 02:53:39 EST


Hi,

I've been trying to narrow down the possible sources of oppses in reiser4_do_readpage_extent (vs-1426, nikita-2688) patch by patch. Reverting reiser4-use-generic-file-read.patch & friends didn't have any effect, downgrading kernel from 2.6.21-rc5 to 2.6.20 too. Eventually it all came down to 0046-reiser4-drop-unused-semaphores.patch. I don't have a reliable way to trigger the bug, but one of my vmware sandboxes with reiser4 root always oppses when I do emerge binutils right after boot. With this patch reverted it no longer happens. **

You can find original patch at:
http://laurent.riffard.free.fr/reiser4/reiser4-for-2.6.21-rc1/broken-out/

I attached patch that adds mutex_write back to reiser4_inode, but as real mutex now instead of semaphore. Cryptcompress rw semaphore is not added back. Updated reiser4-fix-write_extent.patch with proper identation is also attached.

Perhaps this is not a proper fix and only hides real culprit, but at least I do not see oppses now. I do not yet know if this also solves problem with zeroed out files.

Max diff -u -r --exclude='*.cmd' --exclude='*.o' --exclude='*.orig' --exclude='*.rej' linux-2.6.21-rc4-git7/fs/reiser4/inode.h linux-2.6.20.4/fs/reiser4/inode.h
--- linux-2.6.21-rc4-git7/fs/reiser4/inode.h 2007-03-23 14:40:53.445156464 +0300
+++ linux-2.6.20.4/fs/reiser4/inode.h 2007-04-06 04:02:40.000000000 +0400
@@ -136,6 +136,17 @@
cryptcompress_info_t cryptcompress_info;
} file_plugin_data;

+ /* this semaphore is used to serialize writes of any file plugin,
+ * and should be invariant during file plugin conversion (which
+ * is going in the context of ->write()).
+ * inode->i_mutex can not be used for the serialization, because
+ * write_unix_file uses get_user_pages which is to be used under
+ * mm->mmap_sem and because it is required to take mm->mmap_sem before
+ * inode->i_mutex, so inode->i_mutex would have to be up()-ed before
+ * calling to get_user_pages which is unacceptable.
+ */
+ struct mutex mutex_write;
+
/* this semaphore is to serialize readers and writers of @pset->file
* when file plugin conversion is enabled
*/
diff -u -r --exclude='*.cmd' --exclude='*.o' --exclude='*.orig' --exclude='*.rej' linux-2.6.21-rc4-git7/fs/reiser4/plugin/file/file.c linux-2.6.20.4/fs/reiser4/plugin/file/file.c
--- linux-2.6.21-rc4-git7/fs/reiser4/plugin/file/file.c 2007-03-24 01:14:45.000000000 +0300
+++ linux-2.6.20.4/fs/reiser4/plugin/file/file.c 2007-04-06 04:02:40.000000000 +0400
@@ -1937,6 +1933,7 @@

uf_info = unix_file_inode_data(inode);

+ mutex_lock(&reiser4_inode_data(inode)->mutex_write);
get_exclusive_access(uf_info);

if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) {
@@ -1948,6 +1945,7 @@
result = find_file_state(inode, uf_info);
if (result != 0) {
drop_exclusive_access(uf_info);
+ mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
reiser4_exit_context(ctx);
return result;
}
@@ -1963,6 +1961,7 @@
result = check_pages_unix_file(file, inode);
if (result) {
drop_exclusive_access(uf_info);
+ mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
reiser4_exit_context(ctx);
return result;
}
@@ -1977,6 +1976,7 @@
result = reiser4_grab_space_force(needed, BA_CAN_COMMIT);
if (result) {
drop_exclusive_access(uf_info);
+ mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
reiser4_exit_context(ctx);
return result;
}
@@ -1988,6 +1988,7 @@
}

drop_exclusive_access(uf_info);
+ mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
reiser4_exit_context(ctx);
return result;
}
@@ -2369,6 +2370,7 @@
if (in_reiser4 == 0) {
uf_info = unix_file_inode_data(inode);

+ mutex_lock(&reiser4_inode_data(inode)->mutex_write);
get_exclusive_access(uf_info);
if (atomic_read(&file->f_dentry->d_count) == 1 &&
uf_info->container == UF_CONTAINER_EXTENTS &&
@@ -2384,6 +2386,7 @@
}
}
drop_exclusive_access(uf_info);
+ mutex_unlock(&reiser4_inode_data(inode)->mutex_write);
} else {
/*
we are within reiser4 context already. How latter is
@@ -2678,9 +2681,11 @@
return PTR_ERR(ctx);

uf_info = unix_file_inode_data(dentry->d_inode);
+ mutex_lock(&reiser4_inode_data(dentry->d_inode)->mutex_write);
get_exclusive_access(uf_info);
result = setattr_truncate(dentry->d_inode, attr);
drop_exclusive_access(uf_info);
+ mutex_unlock(&reiser4_inode_data(dentry->d_inode)->mutex_write);
context_set_commit_async(ctx);
reiser4_exit_context(ctx);
} else
diff -u -r --exclude='*.cmd' --exclude='*.o' --exclude='*.orig' --exclude='*.rej' linux-2.6.21-rc4-git7/fs/reiser4/super_ops.c linux-2.6.20.4/fs/reiser4/super_ops.c
--- linux-2.6.21-rc4-git7/fs/reiser4/super_ops.c 2007-03-23 14:40:53.818099768 +0300
+++ linux-2.6.20.4/fs/reiser4/super_ops.c 2007-04-06 04:02:40.000000000 +0400
@@ -44,6 +44,7 @@
* etc. that will be added to our private inode part.
*/
INIT_LIST_HEAD(get_readdir_list(&info->vfs_inode));
+ mutex_init(&info->p.mutex_write);
init_rwsem(&info->p.conv_sem);
/* init semaphore which is used during inode loading */
loading_init_once(&info->p);
--- original/fs/reiser4/plugin/file/cryptcompress.c 2007-04-06 08:00:28.000000000 +0400
+++ current/fs/reiser4/plugin/file/cryptcompress.c 2007-04-06 10:20:25.000000000 +0400
@@ -2783,7 +2783,7 @@
if (IS_ERR(ctx))
return PTR_ERR(ctx);

- mutex_lock(&inode->i_mutex);
+ mutex_lock(&reiser4_inode_data(inode)->mutex_write);

result = generic_write_checks(file, &pos, &count, 0);
if (unlikely(result != 0))
@@ -2803,7 +2803,7 @@
/* update position in a file */
*off = pos + result;
out:
- mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&reiser4_inode_data(inode)->mutex_write);

context_set_commit_async(ctx);
reiser4_exit_context(ctx);
--- original/fs/reiser4/plugin/item/extent_file_ops.c 2007-04-06 08:00:28.000000000 +0400
+++ current/fs/reiser4/plugin/item/extent_file_ops.c 2007-04-06 09:23:45.000000000 +0400
@@ -941,15 +941,15 @@
* reiser4_write_extent - write method of extent item plugin
* @file: file to write to
* @buf: address of user-space buffer
- * @write_amount: number of bytes to write
- * @off: position in file to write to
+ * @count: number of bytes to write
+ * @pos: position in file to write to
*
*/
ssize_t reiser4_write_extent(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
int have_to_update_extent;
- int nr_pages;
+ int nr_pages, nr_dirty;
struct page *page;
jnode *jnodes[WRITE_GRANULARITY + 1];
struct inode *inode;
@@ -958,7 +958,7 @@
int i;
int to_page, page_off;
size_t left, written;
- int result;
+ int result = 0;

inode = file->f_dentry->d_inode;
if (write_extent_reserve_space(inode))
@@ -972,10 +972,12 @@

BUG_ON(get_current_context()->trans->atom != NULL);

+ left = count;
index = *pos >> PAGE_CACHE_SHIFT;
/* calculate number of pages which are to be written */
end = ((*pos + count - 1) >> PAGE_CACHE_SHIFT);
nr_pages = end - index + 1;
+ nr_dirty = 0;
assert("", nr_pages <= WRITE_GRANULARITY + 1);

/* get pages and jnodes */
@@ -983,22 +985,18 @@
page = find_or_create_page(inode->i_mapping, index + i,
reiser4_ctx_gfp_mask_get());
if (page == NULL) {
- while(i --) {
- unlock_page(jnode_page(jnodes[i]));
- page_cache_release(jnode_page(jnodes[i]));
- }
- return RETERR(-ENOMEM);
+ nr_pages = i;
+ result = RETERR(-ENOMEM);
+ goto out;
}

jnodes[i] = jnode_of_page(page);
if (IS_ERR(jnodes[i])) {
unlock_page(page);
page_cache_release(page);
- while (i --) {
- jput(jnodes[i]);
- page_cache_release(jnode_page(jnodes[i]));
- }
- return RETERR(-ENOMEM);
+ nr_pages = i;
+ result = RETERR(-ENOMEM);
+ goto out;
}
/* prevent jnode and page from disconnecting */
JF_SET(jnodes[i], JNODE_WRITE_PREPARED);
@@ -1009,7 +1007,6 @@

have_to_update_extent = 0;

- left = count;
page_off = (*pos & (PAGE_CACHE_SIZE - 1));
for (i = 0; i < nr_pages; i ++) {
to_page = PAGE_CACHE_SIZE - page_off;
@@ -1052,12 +1049,19 @@
}

written = filemap_copy_from_user(page, page_off, buf, to_page);
+ if (unlikely(written != to_page)) {
+ unlock_page(page);
+ result = RETERR(-EFAULT);
+ break;
+ }
+
flush_dcache_page(page);
reiser4_set_page_dirty_internal(page);
unlock_page(page);
+ nr_dirty++;
+
mark_page_accessed(page);
SetPageUptodate(page);
- page_cache_release(page);

if (jnodes[i]->blocknr == 0)
have_to_update_extent ++;
@@ -1069,25 +1073,29 @@
}

if (have_to_update_extent) {
- update_extents(file, jnodes, nr_pages, *pos);
+ update_extents(file, jnodes, nr_dirty, *pos);
} else {
- for (i = 0; i < nr_pages; i ++) {
+ for (i = 0; i < nr_dirty; i ++) {
+ int ret;
spin_lock_jnode(jnodes[i]);
- result = reiser4_try_capture(jnodes[i],
+ ret = reiser4_try_capture(jnodes[i],
ZNODE_WRITE_LOCK, 0);
- BUG_ON(result != 0);
+ BUG_ON(ret != 0);
jnode_make_dirty_locked(jnodes[i]);
spin_unlock_jnode(jnodes[i]);
}
}
-
+out:
for (i = 0; i < nr_pages; i ++) {
+ page_cache_release(jnode_page(jnodes[i]));
JF_CLR(jnodes[i], JNODE_WRITE_PREPARED);
jput(jnodes[i]);
}

- /* the only error handled so far is EFAULT on copy_from_user */
- return (count - left) ? (count - left) : -EFAULT;
+ /* the only errors handled so far is ENOMEM and
+ EFAULT on copy_from_user */
+
+ return (count - left) ? (count - left) : result;
}

static inline void zero_page(struct page *page)