Re: [PATCH] kernfs: implement custom llseek method to fix userspace regression

From: Dan Williams
Date: Tue Aug 15 2023 - 11:49:36 EST


Valentin Sinitsyn wrote:
[..]
> > My only concern is whether there are any scenarios where this is not
> > appropriate. I.e. do a bit more work to define a kernfs_ops instance
> > specifically for overriding lseek() in this scenario.
>
> Not sure I'm getting you here: do you mean something like this?
>
> struct inode *inode = is_f_mapping_redefined(file) ? file_inode(file) :
> file->f_mapping->host;

I meant something like the patch below (incomplete, but shows the idea).

> My understanding is file->f_inode should always be non-NULL and point to
> the inode corresponding the path of the opened file, so it should be
> safe to call regardless what f_mapping->host is. Do I miss anything?

That matches my understanding and I do not think you missed anything. At
the same time a comment about "PCI resources" is out of place in
fs/kernfs/file.c.

On the rare chance that someone down the line cares about the
difference, a more localized change like this lets this override be done
in generic terms (f_mapping override) without reference to PCI resource
specifics:

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index a12ac0356c69..748804cd889f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -167,6 +167,23 @@ static int sysfs_kf_bin_mmap(struct kernfs_open_file *of,
return battr->mmap(of->file, kobj, battr, vma);
}

+static loff_t sysfs_kf_bin_llseek(struct kernfs_open_file *of, loff_t offset,
+ int whence)
+{
+ struct bin_attribute *battr = of->kn->priv;
+ struct kobject *kobj = of->kn->parent->priv;
+
+ /* when mapping is overridden do not use it to lookup the inode */
+ if (battr->f_mapping) {
+ struct inode *file_inode(of->file);
+
+ return generic_file_llseek_size(of->file, offset, whence,
+ inode->i_sb->s_maxbytes,
+ i_size_read(inode));
+ }
+ return generic_file_llseek(of->file, offset, whence);
+}
+
static int sysfs_kf_bin_open(struct kernfs_open_file *of)
{
struct bin_attribute *battr = of->kn->priv;
@@ -249,6 +266,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = {
.write = sysfs_kf_bin_write,
.mmap = sysfs_kf_bin_mmap,
.open = sysfs_kf_bin_open,
+ .llseek = sysfs_kf_bin_llseek,
};

int sysfs_add_file_mode_ns(struct kernfs_node *parent,
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 73f5c120def8..9ed535930259 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -316,6 +316,7 @@ struct kernfs_ops {
struct poll_table_struct *pt);

int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
+ loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence);
};

/*