[PATCH] a fix for too big f_pos handling

From: KAMEZAWA Hiroyuki
Date: Wed Sep 30 2009 - 22:28:46 EST


Maybe there are several ways to handle this...this is an idea.

==
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>

Now, rw_verify_area() checsk f_pos is negative or not. And if
negative, returns -EINVAL.

But, some special files as /dev/(k)mem and /proc/<pid>/mem etc..
has negative offsets. And we can't do any access via read/write
to the file(device).

This patch introduce a flag S_VERYBIG and allow negative file
offsets.

Changelog: v4->v5
- clean up patches dor /dev/mem.
- rebased onto 2.6.32-rc1

Changelog: v3->v4
- make changes in mem.c aligned.
- change __negative_fpos_check() to return int.
- fixed bug in "pos" check.
- added comments.

Changelog: v2->v3
- fixed bug in rw_verify_area (it cannot be compiled)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
---
drivers/char/mem.c | 3 +++
fs/proc/base.c | 2 ++
fs/read_write.c | 22 ++++++++++++++++++++--
include/linux/fs.h | 2 ++
4 files changed, 27 insertions(+), 2 deletions(-)

Index: linux-2.6.32-rc1/fs/read_write.c
===================================================================
--- linux-2.6.32-rc1.orig/fs/read_write.c
+++ linux-2.6.32-rc1/fs/read_write.c
@@ -205,6 +205,21 @@ bad:
}
#endif

+static int
+__negative_fpos_check(struct inode *inode, loff_t pos, size_t count)
+{
+ /*
+ * pos or pos+count is negative here, check overflow.
+ * too big "count" will be caught in rw_verify_area().
+ */
+ if ((pos < 0) && (pos + count < pos))
+ return -EOVERFLOW;
+ /* If !VERYBIG inode, negative pos(pos+count) is not allowed */
+ if (!IS_VERYBIG(inode))
+ return -EINVAL;
+ return 0;
+}
+
/*
* rw_verify_area doesn't like huge counts. We limit
* them to something that fits in "int" so that others
@@ -222,8 +237,11 @@ int rw_verify_area(int read_write, struc
if (unlikely((ssize_t) count < 0))
return retval;
pos = *ppos;
- if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
- return retval;
+ if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) {
+ retval = __negative_fpos_check(inode, pos, count);
+ if (retval)
+ return retval;
+ }

if (unlikely(inode->i_flock && mandatory_lock(inode))) {
retval = locks_mandatory_area(
Index: linux-2.6.32-rc1/include/linux/fs.h
===================================================================
--- linux-2.6.32-rc1.orig/include/linux/fs.h
+++ linux-2.6.32-rc1/include/linux/fs.h
@@ -231,6 +231,7 @@ struct inodes_stat_t {
#define S_NOCMTIME 128 /* Do not update file c/mtime */
#define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
#define S_PRIVATE 512 /* Inode is fs-internal */
+#define S_VERYBIG 1024 /* Allow file's loff_t can be negative */

/*
* Note that nosuid etc flags are inode-specific: setting some file-system
@@ -265,6 +266,7 @@ struct inodes_stat_t {
#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
#define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
+#define IS_VERYBIG(inode) ((inode)->i_flags & S_VERYBIG)

/* the read-only stuff doesn't really belong here, but any other place is
probably as bad and I don't want to create yet another include file. */
Index: linux-2.6.32-rc1/drivers/char/mem.c
===================================================================
--- linux-2.6.32-rc1.orig/drivers/char/mem.c
+++ linux-2.6.32-rc1/drivers/char/mem.c
@@ -912,6 +912,9 @@ static int memory_open(struct inode *ino
ret = dev->fops->open(inode, filp);
else
ret = 0;
+ /* Is /dev/mem or /dev/kmem ? */
+ if (dev->dev_info == &directly_mappable_cdev_bdi)
+ inode->i_flags |= S_VERYBIG;
out:
unlock_kernel();
return ret;
Index: linux-2.6.32-rc1/fs/proc/base.c
===================================================================
--- linux-2.6.32-rc1.orig/fs/proc/base.c
+++ linux-2.6.32-rc1/fs/proc/base.c
@@ -778,6 +778,8 @@ static const struct file_operations proc
static int mem_open(struct inode* inode, struct file* file)
{
file->private_data = (void*)((long)current->self_exec_id);
+ /* this file is read only and we can catch out-pf-range */
+ inode->i_flags |= S_VERYBIG;
return 0;
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/