Re: [RFC PATCH v2 04/10] rust: file: Add a new RegularFile newtype useful for reading files

From: Trevor Gross
Date: Wed Jul 26 2023 - 19:52:35 EST


On Wed, Jul 26, 2023 at 12:54 PM Ariel Miculas <amiculas@xxxxxxxxx> wrote:
>
> Implement from_path, from_path_in_root_mnt, read_with_offset,
> read_to_end and get_file_size methods for a RegularFile newtype.
>
> Signed-off-by: Ariel Miculas <amiculas@xxxxxxxxx>
> ---
> rust/helpers.c | 6 ++
> rust/kernel/error.rs | 4 +-
> rust/kernel/file.rs | 129 ++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 135 insertions(+), 4 deletions(-)
>
> diff --git a/rust/helpers.c b/rust/helpers.c
> index eed8ace52fb5..9e860a554cda 100644
> --- a/rust/helpers.c
> +++ b/rust/helpers.c
> @@ -213,6 +213,12 @@ void *rust_helper_alloc_inode_sb(struct super_block *sb,
> }
> EXPORT_SYMBOL_GPL(rust_helper_alloc_inode_sb);
>
> +loff_t rust_helper_i_size_read(const struct inode *inode)
> +{
> + return i_size_read(inode);
> +}
> +EXPORT_SYMBOL_GPL(rust_helper_i_size_read);
> +
> /*
> * We use `bindgen`'s `--size_t-is-usize` option to bind the C `size_t` type
> * as the Rust `usize` type, so we can use it in contexts where Rust
> diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
> index 05fcab6abfe6..e061c83f806a 100644
> --- a/rust/kernel/error.rs
> +++ b/rust/kernel/error.rs
> @@ -273,9 +273,7 @@ pub fn to_result(err: core::ffi::c_int) -> Result {
> /// }
> /// }
> /// ```
> -// TODO: Remove `dead_code` marker once an in-kernel client is available.
> -#[allow(dead_code)]
> -pub(crate) fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
> +pub fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
> // CAST: Casting a pointer to `*const core::ffi::c_void` is always valid.
> let const_ptr: *const core::ffi::c_void = ptr.cast();
> // SAFETY: The FFI function does not deref the pointer.
> diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs
> index 494939ba74df..a3002c416dbb 100644
> --- a/rust/kernel/file.rs
> +++ b/rust/kernel/file.rs
> @@ -8,11 +8,13 @@
> use crate::{
> bindings,
> cred::Credential,
> - error::{code::*, from_result, Error, Result},
> + error::{code::*, from_err_ptr, from_result, Error, Result},
> fs,
> io_buffer::{IoBufferReader, IoBufferWriter},
> iov_iter::IovIter,
> mm,
> + mount::Vfsmount,
> + str::CStr,
> sync::CondVar,
> types::ARef,
> types::AlwaysRefCounted,
> @@ -20,6 +22,7 @@
> types::Opaque,
> user_ptr::{UserSlicePtr, UserSlicePtrReader, UserSlicePtrWriter},
> };
> +use alloc::vec::Vec;
> use core::convert::{TryFrom, TryInto};
> use core::{marker, mem, ptr};
> use macros::vtable;
> @@ -201,6 +204,130 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) {
> }
> }
>
> +/// A newtype over file, specific to regular files
> +pub struct RegularFile(ARef<File>);
> +impl RegularFile {
> + /// Creates a new instance of Self if the file is a regular file
> + ///
> + /// # Safety
> + ///
> + /// The caller must ensure file_ptr.f_inode is initialized to a valid pointer (e.g. file_ptr is
> + /// a pointer returned by path_openat); It must also ensure that file_ptr's reference count was
> + /// incremented at least once
> + fn create_if_regular(file_ptr: ptr::NonNull<bindings::file>) -> Result<RegularFile> {

This function should be unsafe, correct? You instead take a
`&bindings::file` instead of `NonNull` but still keep it unsafe, so
the "valid pointer" invariant is always reached.

Or, could this take `&kernel::file::File` instead to reduce some duplication?

> + // SAFETY: file_ptr is a NonNull pointer
> + let inode = unsafe { core::ptr::addr_of!((*file_ptr.as_ptr()).f_inode).read() };
> + // SAFETY: the caller must ensure f_inode is initialized to a valid pointer
> + unsafe {
> + if bindings::S_IFMT & ((*inode).i_mode) as u32 != bindings::S_IFREG {
> + return Err(EINVAL);
> + }
> + }

Nit: factor `unsafe { ((*inode).i_mode) }` out so it doesn't look like
the whole statement is unsafe

> + // SAFETY: the safety requirements state that file_ptr's reference count was incremented at
> + // least once
> + Ok(RegularFile(unsafe { ARef::from_raw(file_ptr.cast()) }))
> + }
> + /// Constructs a new [`struct file`] wrapper from a path.
> + pub fn from_path(filename: &CStr, flags: i32, mode: u16) -> Result<Self> {
> + let file_ptr = unsafe {
> + // SAFETY: filename is a reference, so it's a valid pointer
> + from_err_ptr(bindings::filp_open(
> + filename.as_ptr() as *const i8,
> + flags,
> + mode,
> + ))?
> + };

I mentioned in another email that `.cast::<i8>()` can be more
idiomatic and a bit safer than `as`, it's a stylistic choice but there
are a few places it could be changed here if desired

Also, the `// SAFETY` comments need to go outside the unsafe block

> + let file_ptr = ptr::NonNull::new(file_ptr).ok_or(ENOENT)?;
> +
> + // SAFETY: `filp_open` initializes the refcount with 1
> + Self::create_if_regular(file_ptr)
> + }

Will need unsafe block if `create_if_regular` becomes unsafe

> +
> + /// Constructs a new [`struct file`] wrapper from a path and a vfsmount.
> + pub fn from_path_in_root_mnt(
> + mount: &Vfsmount,
> + filename: &CStr,
> + flags: i32,
> + mode: u16,
> + ) -> Result<Self> {
> + let file_ptr = {
> + let mnt = mount.get();
> + // construct a path from vfsmount, see file_open_root_mnt
> + let raw_path = bindings::path {
> + mnt,
> + // SAFETY: Vfsmount structure stores a valid vfsmount object
> + dentry: unsafe { (*mnt).mnt_root },
> + };
> + unsafe {
> + // SAFETY: raw_path and filename are both references
> + from_err_ptr(bindings::file_open_root(
> + &raw_path,
> + filename.as_ptr() as *const i8,
> + flags,
> + mode,
> + ))?
> + }
> + };

Is there a reason to use the larger scope block, rather than moving
`mnt` and `raw_path` out and doing `let file_ptr = unsafe { ... }`? If
so, a comment would be good.

> + let file_ptr = ptr::NonNull::new(file_ptr).ok_or(ENOENT)?;
> +
> + // SAFETY: `file_open_root` initializes the refcount with 1
> + Self::create_if_regular(file_ptr)
> + }
> +
> + /// Read from the file into the specified buffer
> + pub fn read_with_offset(&self, buf: &mut [u8], offset: u64) -> Result<usize> {
> + Ok({
> + // kernel_read_file expects a pointer to a "void *" buffer
> + let mut ptr_to_buf = buf.as_mut_ptr() as *mut core::ffi::c_void;
> + // Unless we give a non-null pointer to the file size:
> + // 1. we cannot give a non-zero value for the offset
> + // 2. we cannot have offset 0 and buffer_size > file_size
> + let mut file_size = 0;
> +
> + // SAFETY: 'file' is valid because it's taken from Self, 'buf' and 'file_size` are
> + // references to the stack variables 'ptr_to_buf' and 'file_size'; ptr_to_buf is also
> + // a pointer to a valid buffer that was obtained from a reference
> + let result = unsafe {
> + bindings::kernel_read_file(
> + self.0 .0.get(),

Is this spacing intentional? If so, `(self.0).0.get()` may be cleaner

> + offset.try_into()?,
> + &mut ptr_to_buf,
> + buf.len(),
> + &mut file_size,
> + bindings::kernel_read_file_id_READING_UNKNOWN,
> + )
> + };
> +
> + // kernel_read_file returns the number of bytes read on success or negative on error.
> + if result < 0 {
> + return Err(Error::from_errno(result.try_into()?));
> + }
> +
> + result.try_into()?
> + })
> + }

I think you could remove the block here and just return `Ok(result.try_into()?)`

> +
> + /// Allocate and return a vector containing the contents of the entire file
> + pub fn read_to_end(&self) -> Result<Vec<u8>> {
> + let file_size = self.get_file_size()?;
> + let mut buffer = Vec::try_with_capacity(file_size)?;
> + buffer.try_resize(file_size, 0)?;
> + self.read_with_offset(&mut buffer, 0)?;
> + Ok(buffer)
> + }
> +
> + fn get_file_size(&self) -> Result<usize> {
> + // SAFETY: 'file' is valid because it's taken from Self
> + let file_size = unsafe { bindings::i_size_read((*self.0 .0.get()).f_inode) };
> +
> + if file_size < 0 {
> + return Err(EINVAL);
> + }
> +
> + Ok(file_size.try_into()?)
> + }
> +}
> +
> /// A file descriptor reservation.
> ///
> /// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it,
> --
> 2.41.0
>
>