[PATCH v4 1/2] hugetlb: use f_mode & FMODE_HUGETLBFS to identify hugetlbfs files

From: Mike Kravetz
Date: Thu Jun 11 2020 - 20:52:30 EST


The routine is_file_hugepages() checks f_op == hugetlbfs_file_operations
to determine if the file resides in hugetlbfs. This is problematic when
the file is on a union or overlay. Instead, define a new file mode
FMODE_HUGETLBFS which is set when a hugetlbfs file is opened. The mode
can easily be copied to other 'files' derived from the original hugetlbfs
file.

With this change hugetlbfs_file_operations can be static as it should be.

There is also a (duplicate) set of shm file operations used for the routine
is_file_shm_hugepages(). Instead of setting/using special f_op's, just
propagate the FMODE_HUGETLBFS mode. This means is_file_shm_hugepages() and
the duplicate f_ops can be removed.

While cleaning things up, change the name of is_file_hugepages() to
is_file_hugetlbfs(). The term hugepages is a bit ambiguous.

A subsequent patch will propagate FMODE_HUGETLBFS in overlayfs.

Suggested-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
---
fs/hugetlbfs/inode.c | 7 +++++++
fs/io_uring.c | 2 +-
include/linux/fs.h | 3 +++
include/linux/hugetlb.h | 10 ++++------
include/linux/shm.h | 5 -----
ipc/shm.c | 34 ++++++++--------------------------
mm/memfd.c | 2 +-
mm/mmap.c | 8 ++++----
8 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 991c60c7ffe0..5c0c50a88c84 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -324,6 +324,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval;
}

+static int hugetlbfs_open(struct inode *inode, struct file *file)
+{
+ file->f_mode |= FMODE_HUGETLBFS;
+ return 0;
+}
+
static int hugetlbfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
@@ -1112,6 +1118,7 @@ static void init_once(void *foo)

const struct file_operations hugetlbfs_file_operations = {
.read_iter = hugetlbfs_read_iter,
+ .open = hugetlbfs_open,
.mmap = hugetlbfs_file_mmap,
.fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
diff --git a/fs/io_uring.c b/fs/io_uring.c
index bb25e3997d41..96e8a4bb610a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7123,7 +7123,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
struct vm_area_struct *vma = vmas[j];

if (vma->vm_file &&
- !is_file_hugepages(vma->vm_file)) {
+ !is_file_hugetlbfs(vma->vm_file)) {
ret = -EOPNOTSUPP;
break;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 45cc10cdf6dd..99af9513f9ab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)

+/* File is in hugetlbfs filesystem */
+#define FMODE_HUGETLBFS ((__force fmode_t)0x40000000)
+
/*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 43a1cef8f0f1..aa3408775464 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -429,18 +429,16 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
}

-extern const struct file_operations hugetlbfs_file_operations;
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
struct user_struct **user, int creat_flags,
int page_size_log);

-static inline bool is_file_hugepages(struct file *file)
+static inline bool is_file_hugetlbfs(struct file *file)
{
- if (file->f_op == &hugetlbfs_file_operations)
+ if (unlikely(file->f_mode & FMODE_HUGETLBFS))
return true;
-
- return is_file_shm_hugepages(file);
+ return false;
}

static inline struct hstate *hstate_inode(struct inode *i)
@@ -449,7 +447,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
}
#else /* !CONFIG_HUGETLBFS */

-#define is_file_hugepages(file) false
+#define is_file_hugetlbfs(file) false
static inline struct file *
hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
struct user_struct **user, int creat_flags,
diff --git a/include/linux/shm.h b/include/linux/shm.h
index d8e69aed3d32..1ab62d7b334f 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -16,7 +16,6 @@ struct sysv_shm {

long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
unsigned long shmlba);
-bool is_file_shm_hugepages(struct file *file);
void exit_shm(struct task_struct *task);
#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
#else
@@ -30,10 +29,6 @@ static inline long do_shmat(int shmid, char __user *shmaddr,
{
return -ENOSYS;
}
-static inline bool is_file_shm_hugepages(struct file *file)
-{
- return false;
-}
static inline void exit_shm(struct task_struct *task)
{
}
diff --git a/ipc/shm.c b/ipc/shm.c
index 0ba6add05b35..8f119b1d6170 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -285,7 +285,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
shm_rmid(ns, shp);
shm_unlock(shp);
- if (!is_file_hugepages(shm_file))
+ if (!is_file_hugetlbfs(shm_file))
shmem_lock(shm_file, 0, shp->mlock_user);
else if (shp->mlock_user)
user_shm_unlock(i_size_read(file_inode(shm_file)),
@@ -560,24 +560,6 @@ static const struct file_operations shm_file_operations = {
.fallocate = shm_fallocate,
};

-/*
- * shm_file_operations_huge is now identical to shm_file_operations,
- * but we keep it distinct for the sake of is_file_shm_hugepages().
- */
-static const struct file_operations shm_file_operations_huge = {
- .mmap = shm_mmap,
- .fsync = shm_fsync,
- .release = shm_release,
- .get_unmapped_area = shm_get_unmapped_area,
- .llseek = noop_llseek,
- .fallocate = shm_fallocate,
-};
-
-bool is_file_shm_hugepages(struct file *file)
-{
- return file->f_op == &shm_file_operations_huge;
-}
-
static const struct vm_operations_struct shm_vm_ops = {
.open = shm_open, /* callback for a new vm-area open */
.close = shm_close, /* callback for when the vm-area is released */
@@ -698,7 +680,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
no_id:
ipc_update_pid(&shp->shm_cprid, NULL);
ipc_update_pid(&shp->shm_lprid, NULL);
- if (is_file_hugepages(file) && shp->mlock_user)
+ if (is_file_hugetlbfs(file) && shp->mlock_user)
user_shm_unlock(size, shp->mlock_user);
fput(file);
ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
@@ -836,7 +818,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,

inode = file_inode(shp->shm_file);

- if (is_file_hugepages(shp->shm_file)) {
+ if (is_file_hugetlbfs(shp->shm_file)) {
struct address_space *mapping = inode->i_mapping;
struct hstate *h = hstate_file(shp->shm_file);
*rss_add += pages_per_huge_page(h) * mapping->nrpages;
@@ -1102,7 +1084,7 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
}

shm_file = shp->shm_file;
- if (is_file_hugepages(shm_file))
+ if (is_file_hugetlbfs(shm_file))
goto out_unlock0;

if (cmd == SHM_LOCK) {
@@ -1523,10 +1505,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
goto out_nattch;
}

- file = alloc_file_clone(base, f_flags,
- is_file_hugepages(base) ?
- &shm_file_operations_huge :
- &shm_file_operations);
+ file = alloc_file_clone(base, f_flags, &shm_file_operations);
err = PTR_ERR(file);
if (IS_ERR(file)) {
kfree(sfd);
@@ -1534,6 +1513,9 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
goto out_nattch;
}

+ /* copy hugetlbfs mode for is_file_hugetlbfs() */
+ file->f_mode |= (base->f_mode & FMODE_HUGETLBFS);
+
sfd->id = shp->shm_perm.id;
sfd->ns = get_ipc_ns(ns);
sfd->file = base;
diff --git a/mm/memfd.c b/mm/memfd.c
index 2647c898990c..e6c16b6bf3f6 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -123,7 +123,7 @@ static unsigned int *memfd_file_seals_ptr(struct file *file)
return &SHMEM_I(file_inode(file))->seals;

#ifdef CONFIG_HUGETLBFS
- if (is_file_hugepages(file))
+ if (is_file_hugetlbfs(file))
return &HUGETLBFS_I(file_inode(file))->seals;
#endif

diff --git a/mm/mmap.c b/mm/mmap.c
index f609e9ec4a25..703a9680a937 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1538,7 +1538,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags |= VM_NORESERVE;

/* hugetlb applies strict overcommit unless MAP_NORESERVE */
- if (file && is_file_hugepages(file))
+ if (file && is_file_hugetlbfs(file))
vm_flags |= VM_NORESERVE;
}

@@ -1562,10 +1562,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
file = fget(fd);
if (!file)
return -EBADF;
- if (is_file_hugepages(file))
+ if (is_file_hugetlbfs(file))
len = ALIGN(len, huge_page_size(hstate_file(file)));
retval = -EINVAL;
- if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
+ if (unlikely(flags & MAP_HUGETLB && !is_file_hugetlbfs(file)))
goto out_fput;
} else if (flags & MAP_HUGETLB) {
struct user_struct *user = NULL;
@@ -1678,7 +1678,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
* hugetlb has its own accounting separate from the core VM
* VM_HUGETLB may not be set yet so we cannot check for that flag.
*/
- if (file && is_file_hugepages(file))
+ if (file && is_file_hugetlbfs(file))
return 0;

return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
--
2.25.4