Re: [RFC PATCH v2 04/10] rust: file: Add a new RegularFile newtype useful for reading files

From: Ariel Miculas
Date: Thu Jul 27 2023 - 09:18:21 EST


On Thu, Jul 27, 2023 at 2:52 AM Trevor Gross <tmgross@xxxxxxxxx> wrote:
>
> On Wed, Jul 26, 2023 at 12:54 PM Ariel Miculas <amiculas@xxxxxxxxx> wrote:
> >
> > Implement from_path, from_path_in_root_mnt, read_with_offset,
> > read_to_end and get_file_size methods for a RegularFile newtype.
> >
> > Signed-off-by: Ariel Miculas <amiculas@xxxxxxxxx>
> > ---
> > rust/helpers.c | 6 ++
> > rust/kernel/error.rs | 4 +-
> > rust/kernel/file.rs | 129 ++++++++++++++++++++++++++++++++++++++++++-
> > 3 files changed, 135 insertions(+), 4 deletions(-)
> >
> > diff --git a/rust/helpers.c b/rust/helpers.c
> > index eed8ace52fb5..9e860a554cda 100644
> > --- a/rust/helpers.c
> > +++ b/rust/helpers.c
> > @@ -213,6 +213,12 @@ void *rust_helper_alloc_inode_sb(struct super_block *sb,
> > }
> > EXPORT_SYMBOL_GPL(rust_helper_alloc_inode_sb);
> >
> > +loff_t rust_helper_i_size_read(const struct inode *inode)
> > +{
> > + return i_size_read(inode);
> > +}
> > +EXPORT_SYMBOL_GPL(rust_helper_i_size_read);
> > +
> > /*
> > * We use `bindgen`'s `--size_t-is-usize` option to bind the C `size_t` type
> > * as the Rust `usize` type, so we can use it in contexts where Rust
> > diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
> > index 05fcab6abfe6..e061c83f806a 100644
> > --- a/rust/kernel/error.rs
> > +++ b/rust/kernel/error.rs
> > @@ -273,9 +273,7 @@ pub fn to_result(err: core::ffi::c_int) -> Result {
> > /// }
> > /// }
> > /// ```
> > -// TODO: Remove `dead_code` marker once an in-kernel client is available.
> > -#[allow(dead_code)]
> > -pub(crate) fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
> > +pub fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
> > // CAST: Casting a pointer to `*const core::ffi::c_void` is always valid.
> > let const_ptr: *const core::ffi::c_void = ptr.cast();
> > // SAFETY: The FFI function does not deref the pointer.
> > diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs
> > index 494939ba74df..a3002c416dbb 100644
> > --- a/rust/kernel/file.rs
> > +++ b/rust/kernel/file.rs
> > @@ -8,11 +8,13 @@
> > use crate::{
> > bindings,
> > cred::Credential,
> > - error::{code::*, from_result, Error, Result},
> > + error::{code::*, from_err_ptr, from_result, Error, Result},
> > fs,
> > io_buffer::{IoBufferReader, IoBufferWriter},
> > iov_iter::IovIter,
> > mm,
> > + mount::Vfsmount,
> > + str::CStr,
> > sync::CondVar,
> > types::ARef,
> > types::AlwaysRefCounted,
> > @@ -20,6 +22,7 @@
> > types::Opaque,
> > user_ptr::{UserSlicePtr, UserSlicePtrReader, UserSlicePtrWriter},
> > };
> > +use alloc::vec::Vec;
> > use core::convert::{TryFrom, TryInto};
> > use core::{marker, mem, ptr};
> > use macros::vtable;
> > @@ -201,6 +204,130 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) {
> > }
> > }
> >
> > +/// A newtype over file, specific to regular files
> > +pub struct RegularFile(ARef<File>);
> > +impl RegularFile {
> > + /// Creates a new instance of Self if the file is a regular file
> > + ///
> > + /// # Safety
> > + ///
> > + /// The caller must ensure file_ptr.f_inode is initialized to a valid pointer (e.g. file_ptr is
> > + /// a pointer returned by path_openat); It must also ensure that file_ptr's reference count was
> > + /// incremented at least once
> > + fn create_if_regular(file_ptr: ptr::NonNull<bindings::file>) -> Result<RegularFile> {
>
> This function should be unsafe, correct? You instead take a
> `&bindings::file` instead of `NonNull` but still keep it unsafe, so
> the "valid pointer" invariant is always reached.
>
> Or, could this take `&kernel::file::File` instead to reduce some duplication?
>
Yes, this should have been unsafe, I've also changed it to receive a
`*mut bindings::file` instead of NonNull.
> > + // SAFETY: file_ptr is a NonNull pointer
> > + let inode = unsafe { core::ptr::addr_of!((*file_ptr.as_ptr()).f_inode).read() };
> > + // SAFETY: the caller must ensure f_inode is initialized to a valid pointer
> > + unsafe {
> > + if bindings::S_IFMT & ((*inode).i_mode) as u32 != bindings::S_IFREG {
> > + return Err(EINVAL);
> > + }
> > + }
>
> Nit: factor `unsafe { ((*inode).i_mode) }` out so it doesn't look like
> the whole statement is unsafe
>
> > + // SAFETY: the safety requirements state that file_ptr's reference count was incremented at
> > + // least once
> > + Ok(RegularFile(unsafe { ARef::from_raw(file_ptr.cast()) }))
> > + }
> > + /// Constructs a new [`struct file`] wrapper from a path.
> > + pub fn from_path(filename: &CStr, flags: i32, mode: u16) -> Result<Self> {
> > + let file_ptr = unsafe {
> > + // SAFETY: filename is a reference, so it's a valid pointer
> > + from_err_ptr(bindings::filp_open(
> > + filename.as_ptr() as *const i8,
> > + flags,
> > + mode,
> > + ))?
> > + };
>
> I mentioned in another email that `.cast::<i8>()` can be more
> idiomatic and a bit safer than `as`, it's a stylistic choice but there
> are a few places it could be changed here if desired
Thanks, didn't know about this.
>
> Also, the `// SAFETY` comments need to go outside the unsafe block
>
> > + let file_ptr = ptr::NonNull::new(file_ptr).ok_or(ENOENT)?;
> > +
> > + // SAFETY: `filp_open` initializes the refcount with 1
> > + Self::create_if_regular(file_ptr)
> > + }
>
> Will need unsafe block if `create_if_regular` becomes unsafe
>
> > +
> > + /// Constructs a new [`struct file`] wrapper from a path and a vfsmount.
> > + pub fn from_path_in_root_mnt(
> > + mount: &Vfsmount,
> > + filename: &CStr,
> > + flags: i32,
> > + mode: u16,
> > + ) -> Result<Self> {
> > + let file_ptr = {
> > + let mnt = mount.get();
> > + // construct a path from vfsmount, see file_open_root_mnt
> > + let raw_path = bindings::path {
> > + mnt,
> > + // SAFETY: Vfsmount structure stores a valid vfsmount object
> > + dentry: unsafe { (*mnt).mnt_root },
> > + };
> > + unsafe {
> > + // SAFETY: raw_path and filename are both references
> > + from_err_ptr(bindings::file_open_root(
> > + &raw_path,
> > + filename.as_ptr() as *const i8,
> > + flags,
> > + mode,
> > + ))?
> > + }
> > + };
>
> Is there a reason to use the larger scope block, rather than moving
> `mnt` and `raw_path` out and doing `let file_ptr = unsafe { ... }`? If
> so, a comment would be good.
No, that's just how the code has evolved from splitting unsafe blocks,
I've changed this in
https://github.com/ariel-miculas/linux/tree/puzzlefs_rfc
>
> > + let file_ptr = ptr::NonNull::new(file_ptr).ok_or(ENOENT)?;
> > +
> > + // SAFETY: `file_open_root` initializes the refcount with 1
> > + Self::create_if_regular(file_ptr)
> > + }
> > +
> > + /// Read from the file into the specified buffer
> > + pub fn read_with_offset(&self, buf: &mut [u8], offset: u64) -> Result<usize> {
> > + Ok({
> > + // kernel_read_file expects a pointer to a "void *" buffer
> > + let mut ptr_to_buf = buf.as_mut_ptr() as *mut core::ffi::c_void;
> > + // Unless we give a non-null pointer to the file size:
> > + // 1. we cannot give a non-zero value for the offset
> > + // 2. we cannot have offset 0 and buffer_size > file_size
> > + let mut file_size = 0;
> > +
> > + // SAFETY: 'file' is valid because it's taken from Self, 'buf' and 'file_size` are
> > + // references to the stack variables 'ptr_to_buf' and 'file_size'; ptr_to_buf is also
> > + // a pointer to a valid buffer that was obtained from a reference
> > + let result = unsafe {
> > + bindings::kernel_read_file(
> > + self.0 .0.get(),
>
> Is this spacing intentional? If so, `(self.0).0.get()` may be cleaner
No, it's not intentional, this is rustfmt`s creation.
>
> > + offset.try_into()?,
> > + &mut ptr_to_buf,
> > + buf.len(),
> > + &mut file_size,
> > + bindings::kernel_read_file_id_READING_UNKNOWN,
> > + )
> > + };
> > +
> > + // kernel_read_file returns the number of bytes read on success or negative on error.
> > + if result < 0 {
> > + return Err(Error::from_errno(result.try_into()?));
> > + }
> > +
> > + result.try_into()?
> > + })
> > + }
>
> I think you could remove the block here and just return `Ok(result.try_into()?)`
Good idea.
>
> > +
> > + /// Allocate and return a vector containing the contents of the entire file
> > + pub fn read_to_end(&self) -> Result<Vec<u8>> {
> > + let file_size = self.get_file_size()?;
> > + let mut buffer = Vec::try_with_capacity(file_size)?;
> > + buffer.try_resize(file_size, 0)?;
> > + self.read_with_offset(&mut buffer, 0)?;
> > + Ok(buffer)
> > + }
> > +
> > + fn get_file_size(&self) -> Result<usize> {
> > + // SAFETY: 'file' is valid because it's taken from Self
> > + let file_size = unsafe { bindings::i_size_read((*self.0 .0.get()).f_inode) };
> > +
> > + if file_size < 0 {
> > + return Err(EINVAL);
> > + }
> > +
> > + Ok(file_size.try_into()?)
> > + }
> > +}
> > +
> > /// A file descriptor reservation.
> > ///
> > /// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it,
> > --
> > 2.41.0
> >
> >
>