[PATCH 11/49] ext4: Add support for 48 bit inode i_blocks.

From: Theodore Ts'o
Date: Mon Jan 21 2008 - 22:15:39 EST


From: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx>

Use the __le16 l_i_reserved1 field of the linux2 struct of ext4_inode
to represet the higher 16 bits for i_blocks. With this change max_file
size becomes (2**48 -1 )* 512 bytes.

We add a RO_COMPAT feature to the super block to indicate that inode
have i_blocks represented as a split 48 bits. Super block with this
feature set cannot be mounted read write on a kernel with CONFIG_LSF
disabled.

Super block flag EXT4_FEATURE_RO_COMPAT_HUGE_FILE

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx>
---
fs/ext4/inode.c | 58 ++++++++++++++++++++++++++++++++++++++++++-
fs/ext4/super.c | 62 ++++++++++++++++++++++++++++++++++++++++++----
include/linux/ext4_fs.h | 10 +++++--
3 files changed, 119 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e663455..bb89fe7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2667,6 +2667,22 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
if (flags & S_DIRSYNC)
ei->i_flags |= EXT4_DIRSYNC_FL;
}
+static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
+ struct ext4_inode_info *ei)
+{
+ blkcnt_t i_blocks ;
+ struct super_block *sb = ei->vfs_inode.i_sb;
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
+ /* we are using combined 48 bit field */
+ i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
+ le32_to_cpu(raw_inode->i_blocks_lo);
+ return i_blocks;
+ } else {
+ return le32_to_cpu(raw_inode->i_blocks_lo);
+ }
+}

void ext4_read_inode(struct inode * inode)
{
@@ -2715,8 +2731,8 @@ void ext4_read_inode(struct inode * inode)
* recovery code: that's fine, we're about to complete
* the process of deleting those. */
}
- inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+ inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD)) {
@@ -2799,6 +2815,43 @@ bad_inode:
return;
}

+static int ext4_inode_blocks_set(handle_t *handle,
+ struct ext4_inode *raw_inode,
+ struct ext4_inode_info *ei)
+{
+ struct inode *inode = &(ei->vfs_inode);
+ u64 i_blocks = inode->i_blocks;
+ struct super_block *sb = inode->i_sb;
+ int err = 0;
+
+ if (i_blocks <= ~0U) {
+ /*
+ * i_blocks can be represnted in a 32 bit variable
+ * as multiple of 512 bytes
+ */
+ raw_inode->i_blocks_lo = cpu_to_le32((u32)i_blocks);
+ raw_inode->i_blocks_high = 0;
+ } else if (i_blocks <= 0xffffffffffffULL) {
+ /*
+ * i_blocks can be represented in a 48 bit variable
+ * as multiple of 512 bytes
+ */
+ err = ext4_update_rocompat_feature(handle, sb,
+ EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
+ if (err)
+ goto err_out;
+ /* i_block is stored in the split 48 bit fields */
+ raw_inode->i_blocks_lo = cpu_to_le32((u32)i_blocks);
+ raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
+ } else {
+ ext4_error(sb, __FUNCTION__,
+ "Wrong inode i_blocks count %llu\n",
+ (unsigned long long)inode->i_blocks);
+ }
+err_out:
+ return err;
+}
+
/*
* Post the struct inode info into an on-disk inode location in the
* buffer-cache. This gobbles the caller's reference to the
@@ -2853,7 +2906,8 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);

- raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+ if (ext4_inode_blocks_set(handle, raw_inode, ei))
+ goto out_brelse;
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7be27db..2b9dc96 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1603,17 +1603,50 @@ static void ext4_orphan_cleanup (struct super_block * sb,

/*
* Maximal file size. There is a direct, and {,double-,triple-}indirect
- * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
- * We need to be 1 filesystem block less than the 2^32 sector limit.
+ * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
+ * We need to be 1 filesystem block less than the 2^48 sector limit.
*/
static loff_t ext4_max_size(int bits)
{
loff_t res = EXT4_NDIR_BLOCKS;
- /* This constant is calculated to be the largest file size for a
- * dense, 4k-blocksize file such that the total number of
+ int meta_blocks;
+ loff_t upper_limit;
+ /* This is calculated to be the largest file size for a
+ * dense, file such that the total number of
* sectors in the file, including data and all indirect blocks,
- * does not exceed 2^32. */
- const loff_t upper_limit = 0x1ff7fffd000LL;
+ * does not exceed 2^48 -1
+ * __u32 i_blocks_lo and _u16 i_blocks_high representing the
+ * total number of 512 bytes blocks of the file
+ */
+
+ if (sizeof(blkcnt_t) < sizeof(u64)) {
+ /*
+ * CONFIG_LSF is not enabled implies the inode
+ * i_block represent total blocks in 512 bytes
+ * 32 == size of vfs inode i_blocks * 8
+ */
+ upper_limit = (1LL << 32) - 1;
+
+ /* total blocks in file system block size */
+ upper_limit >>= (bits - 9);
+
+ } else {
+ /* We use 48 bit ext4_inode i_blocks */
+ upper_limit = (1LL << 48) - 1;
+
+ /* total blocks in file system block size */
+ upper_limit >>= (bits - 9);
+ }
+
+ /* indirect blocks */
+ meta_blocks = 1;
+ /* double indirect blocks */
+ meta_blocks += 1 + (1LL << (bits-2));
+ /* tripple indirect blocks */
+ meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
+
+ upper_limit -= meta_blocks;
+ upper_limit <<= bits;

res += 1LL << (bits-2);
res += 1LL << (2*(bits-2));
@@ -1621,6 +1654,10 @@ static loff_t ext4_max_size(int bits)
res <<= bits;
if (res > upper_limit)
res = upper_limit;
+
+ if (res > MAX_LFS_FILESIZE)
+ res = MAX_LFS_FILESIZE;
+
return res;
}

@@ -1789,6 +1826,19 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sb->s_id, le32_to_cpu(features));
goto failed_mount;
}
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
+ /*
+ * Large file size enabled file system can only be
+ * mount if kernel is build with CONFIG_LSF
+ */
+ if (sizeof(root->i_blocks) < sizeof(u64) &&
+ !(sb->s_flags & MS_RDONLY)) {
+ printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
+ "files cannot be mounted read-write "
+ "without CONFIG_LSF.\n", sb->s_id);
+ goto failed_mount;
+ }
+ }
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);

if (blocksize < EXT4_MIN_BLOCK_SIZE ||
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index a8f3fae..be25eca 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -282,7 +282,7 @@ struct ext4_inode {
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
__le16 i_links_count; /* Links count */
- __le32 i_blocks; /* Blocks count */
+ __le32 i_blocks_lo; /* Blocks count */
__le32 i_flags; /* File flags */
union {
struct {
@@ -302,7 +302,7 @@ struct ext4_inode {
__le32 i_obso_faddr; /* Obsoleted fragment address */
union {
struct {
- __le16 l_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
+ __le16 l_i_blocks_high; /* were l_i_reserved1 */
__le16 l_i_file_acl_high;
__le16 l_i_uid_high; /* these 2 fields */
__le16 l_i_gid_high; /* were reserved2[0] */
@@ -404,6 +404,7 @@ do { \
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_file_acl_high osd2.linux2.l_i_file_acl_high
+#define i_blocks_high osd2.linux2.l_i_blocks_high
#define i_uid_low i_uid
#define i_gid_low i_gid
#define i_uid_high osd2.linux2.l_i_uid_high
@@ -670,6 +671,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008
#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
@@ -681,6 +683,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
+#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200

#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
@@ -695,7 +698,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
- EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
+ EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
+ EXT4_FEATURE_RO_COMPAT_HUGE_FILE)

/*
* Default values for user and/or group using reserved blocks
--
1.5.4.rc3.31.g1271-dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/