[PATCH 7/7] rust: file: add abstraction for `poll_table`

From: Alice Ryhl
Date: Wed Nov 29 2023 - 08:13:01 EST


The existing `CondVar` abstraction is a wrapper around `wait_list`, but
it does not support all use-cases of the C `wait_list` type. To be
specific, a `CondVar` cannot be registered with a `struct poll_table`.
This limitation has the advantage that you do not need to call
`synchronize_rcu` when destroying a `CondVar`.

However, we need the ability to register a `poll_table` with a
`wait_list` in Rust Binder. To enable this, introduce a type called
`PollCondVar`, which is like `CondVar` except that you can register a
`poll_table`. We also introduce `PollTable`, which is a safe wrapper
around `poll_table` that is intended to be used with `PollCondVar`.

The destructor of `PollCondVar` unconditionally calls `synchronize_rcu`
to ensure that the removal of epoll waiters has fully completed before
the `wait_list` is destroyed.

Signed-off-by: Alice Ryhl <aliceryhl@xxxxxxxxxx>
---
That said, `synchronize_rcu` is rather expensive and is not needed in
all cases: If we have never registered a `poll_table` with the
`wait_list`, then we don't need to call `synchronize_rcu`. (And this is
a common case in Binder - not all processes use Binder with epoll.) The
current implementation does not account for this, but we could change it
to store a boolean next to the `wait_list` to keep track of whether a
`poll_table` has ever been registered. It is up to discussion whether
this is desireable.

It is not clear to me whether we can implement the above without storing
an extra boolean. We could check whether the `wait_list` is empty, but
it is not clear that this is sufficient. Perhaps someone knows the
answer? If a `poll_table` has previously been registered with a
`wait_list`, is it the case that we can kfree the `wait_list` after
observing that the `wait_list` is empty without waiting for an rcu grace
period?

rust/bindings/bindings_helper.h | 2 +
rust/bindings/lib.rs | 1 +
rust/kernel/file.rs | 3 ++
rust/kernel/file/poll_table.rs | 97 +++++++++++++++++++++++++++++++++++++++++
rust/kernel/sync/condvar.rs | 2 +-
5 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index c8daee341df6..14f84aeef62d 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pid_namespace.h>
+#include <linux/poll.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/refcount.h>
@@ -25,3 +26,4 @@
const size_t BINDINGS_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL;
const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO;
+const __poll_t BINDINGS_POLLFREE = POLLFREE;
diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs
index 9bcbea04dac3..eeb291cc60db 100644
--- a/rust/bindings/lib.rs
+++ b/rust/bindings/lib.rs
@@ -51,3 +51,4 @@ mod bindings_helper {

pub const GFP_KERNEL: gfp_t = BINDINGS_GFP_KERNEL;
pub const __GFP_ZERO: gfp_t = BINDINGS___GFP_ZERO;
+pub const POLLFREE: __poll_t = BINDINGS_POLLFREE;
diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs
index 578ee307093f..35576678c993 100644
--- a/rust/kernel/file.rs
+++ b/rust/kernel/file.rs
@@ -14,6 +14,9 @@
use alloc::boxed::Box;
use core::{alloc::AllocError, marker::PhantomData, mem, ptr};

+mod poll_table;
+pub use self::poll_table::{PollCondVar, PollTable};
+
/// Flags associated with a [`File`].
pub mod flags {
/// File is opened in append mode.
diff --git a/rust/kernel/file/poll_table.rs b/rust/kernel/file/poll_table.rs
new file mode 100644
index 000000000000..a26b64df0106
--- /dev/null
+++ b/rust/kernel/file/poll_table.rs
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Utilities for working with `struct poll_table`.
+
+use crate::{
+ bindings,
+ file::File,
+ prelude::*,
+ sync::{CondVar, LockClassKey},
+ types::Opaque,
+};
+use core::ops::Deref;
+
+/// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class.
+#[macro_export]
+macro_rules! new_poll_condvar {
+ ($($name:literal)?) => {
+ $crate::file::PollCondVar::new($crate::optional_name!($($name)?), $crate::static_lock_class!())
+ };
+}
+
+/// Wraps the kernel's `struct poll_table`.
+#[repr(transparent)]
+pub struct PollTable(Opaque<bindings::poll_table>);
+
+impl PollTable {
+ /// Creates a reference to a [`PollTable`] from a valid pointer.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that for the duration of 'a, the pointer will point at a valid poll
+ /// table, and that it is only accessed via the returned reference.
+ pub unsafe fn from_ptr<'a>(ptr: *mut bindings::poll_table) -> &'a mut PollTable {
+ // SAFETY: The safety requirements guarantee the validity of the dereference, while the
+ // `PollTable` type being transparent makes the cast ok.
+ unsafe { &mut *ptr.cast() }
+ }
+
+ fn get_qproc(&self) -> bindings::poll_queue_proc {
+ let ptr = self.0.get();
+ // SAFETY: The `ptr` is valid because it originates from a reference, and the `_qproc`
+ // field is not modified concurrently with this call.
+ unsafe { (*ptr)._qproc }
+ }
+
+ /// Register this [`PollTable`] with the provided [`PollCondVar`], so that it can be notified
+ /// using the condition variable.
+ pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) {
+ if let Some(qproc) = self.get_qproc() {
+ // SAFETY: The pointers to `self` and `file` are valid because they are references.
+ //
+ // Before the wait list is destroyed, the destructor of `PollCondVar` will clear
+ // everything in the wait list, so the wait list is not used after it is freed.
+ unsafe { qproc(file.0.get() as _, cv.wait_list.get(), self.0.get()) };
+ }
+ }
+}
+
+/// A wrapper around [`CondVar`] that makes it usable with [`PollTable`].
+///
+/// [`CondVar`]: crate::sync::CondVar
+#[pin_data(PinnedDrop)]
+pub struct PollCondVar {
+ #[pin]
+ inner: CondVar,
+}
+
+impl PollCondVar {
+ /// Constructs a new condvar initialiser.
+ #[allow(clippy::new_ret_no_self)]
+ pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit<Self> {
+ pin_init!(Self {
+ inner <- CondVar::new(name, key),
+ })
+ }
+}
+
+// Make the `CondVar` methods callable on `PollCondVar`.
+impl Deref for PollCondVar {
+ type Target = CondVar;
+
+ fn deref(&self) -> &CondVar {
+ &self.inner
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for PollCondVar {
+ fn drop(self: Pin<&mut Self>) {
+ // Clear anything registered using `register_wait`.
+ self.inner.notify(1, bindings::POLLHUP | bindings::POLLFREE);
+ // Wait for epoll items to be properly removed.
+ //
+ // SAFETY: Just an FFI call.
+ unsafe { bindings::synchronize_rcu() };
+ }
+}
diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs
index b679b6f6dbeb..2d276a013ec8 100644
--- a/rust/kernel/sync/condvar.rs
+++ b/rust/kernel/sync/condvar.rs
@@ -143,7 +143,7 @@ pub fn wait_uninterruptible<T: ?Sized, B: Backend>(&self, guard: &mut Guard<'_,
}

/// Calls the kernel function to notify the appropriate number of threads with the given flags.
- fn notify(&self, count: i32, flags: u32) {
+ pub(crate) fn notify(&self, count: i32, flags: u32) {
// SAFETY: `wait_list` points to valid memory.
unsafe {
bindings::__wake_up(

--
2.43.0.rc1.413.gea7ed67945-goog