Re: [PATCH v2 5/6] rust: rbtree: add `RBTreeCursor`

From: Benno Lossin
Date: Thu Mar 14 2024 - 13:13:53 EST


On 2/19/24 12:48, Matt Gilbride wrote:
> Add a cursor interface to `RBTree`, supporting the following use cases:
> - Inspect the current node pointed to by the cursor, inspect/move to
> it's neighbors in sort order (bidirectionally).
> - Mutate the tree itself by removing the current node pointed to by the
> cursor, or one of its neighbors.
>
> Add functions to obtain a cursor to the tree by key:
> - The node with the smallest key
> - The node with the largest key
> - The node matching the given key, or the one with the next larger key
>
> The cursor abstraction is needed by the binder driver to efficiently
> search for nodes and (conditionally) modify them, as well as their
> neighbors [1].
>
> Link: https://lore.kernel.org/rust-for-linux/20231101-rust-binder-v1-6-08ba9197f637@xxxxxxxxxx/ [1]
> Co-developed-by: Alice Ryhl <aliceryhl@xxxxxxxxxx>
> Signed-off-by: Alice Ryhl <aliceryhl@xxxxxxxxxx>
> Reviewed-by: Alice Ryhl <aliceryhl@xxxxxxxxxx>
> Tested-by: Alice Ryhl <aliceryhl@xxxxxxxxxx>
> Signed-off-by: Matt Gilbride <mattgilbride@xxxxxxxxxx>
> ---
> rust/kernel/rbtree.rs | 512 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 512 insertions(+)
>
> diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs
> index ccf74e0dc3ec..6c3b10e46517 100644
> --- a/rust/kernel/rbtree.rs
> +++ b/rust/kernel/rbtree.rs
> @@ -245,6 +245,36 @@ pub fn values(&self) -> impl Iterator<Item = &'_ V> {
> pub fn values_mut(&mut self) -> impl Iterator<Item = &'_ mut V> {
> self.iter_mut().map(|(_, v)| v)
> }
> +
> + /// Returns a cursor over the tree nodes, starting with the smallest key.
> + pub fn cursor_front(&mut self) -> Option<RBTreeCursor<'_, K, V>> {
> + let root = addr_of_mut!(self.root);
> + // SAFETY: `self.root` is always a valid root node
> + let current = unsafe { bindings::rb_first(root) };
> + if current.is_null() {
> + return None;
> + }
> + Some(RBTreeCursor {

Missing INVARIANT comment.

> + _tree: PhantomData,
> + root,
> + current,
> + })
> + }
> +
> + /// Returns a cursor over the tree nodes, starting with the largest key.
> + pub fn cursor_back(&mut self) -> Option<RBTreeCursor<'_, K, V>> {
> + let root = addr_of_mut!(self.root);
> + // SAFETY: `self.root` is always a valid root node
> + let current = unsafe { bindings::rb_last(root) };
> + if current.is_null() {
> + return None;
> + }
> + Some(RBTreeCursor {

Ditto.

> + _tree: PhantomData,
> + root,
> + current,
> + })
> + }
> }
>
> impl<K, V> RBTree<K, V>
> @@ -377,6 +407,59 @@ pub fn remove(&mut self, key: &K) -> Option<V> {
> } = *node;
> Some(value)
> }
> +
> + /// Returns a cursor over the tree nodes based on the given key.
> + ///
> + /// If the given key exists, the cursor starts there.
> + /// Otherwise it starts with the first larger key in sort order.
> + /// If there is no larger key, it returns [`None`].
> + pub fn cursor_lower_bound(&mut self, key: &K) -> Option<RBTreeCursor<'_, K, V>>
> + where
> + K: Ord,
> + {
> + let mut node = self.root.rb_node;
> + let mut best_match: Option<NonNull<Node<K, V>>> = None;
> + while !node.is_null() {
> + // SAFETY: All links fields we create are in a `Node<K, V>`.
> + let this = unsafe { crate::container_of!(node, Node<K, V>, links) }.cast_mut();
> + // SAFETY: `this` is a non-null node so it is valid by the type invariants.
> + let this_key = unsafe { &(*this).key };
> + // SAFETY: `node` is a non-null node so it is valid by the type invariants.
> + let left_child = unsafe { (*node).rb_left };
> + // SAFETY: `node` is a non-null node so it is valid by the type invariants.
> + let right_child = unsafe { (*node).rb_right };
> + if key == this_key {
> + return Some(RBTreeCursor {

Ditto.

> + _tree: PhantomData,
> + root: addr_of_mut!(self.root),
> + current: node,
> + });
> + } else {
> + node = if key > this_key {
> + right_child
> + } else {
> + let is_better_match = match best_match {
> + None => true,
> + Some(best) => {
> + // SAFETY: `best` is a non-null node so it is valid by the type invariants.
> + let best_key = unsafe { &(*best.as_ptr()).key };
> + best_key > this_key
> + }
> + };
> + if is_better_match {
> + best_match = NonNull::new(this);
> + }
> + left_child
> + }
> + };
> + }
> + best_match.map(|best| RBTreeCursor {

Ditto.

> + _tree: PhantomData,
> + root: addr_of_mut!(self.root),
> + // SAFETY: `best` is a non-null node so it is valid by the type invariants.
> + current: unsafe { addr_of_mut!((*best.as_ptr()).links) },
> + })
> + }
> }
>
> impl<K, V> Default for RBTree<K, V> {
> @@ -407,6 +490,435 @@ fn drop(&mut self) {
> }
> }
>
> +/// A bidirectional cursor over the tree nodes, sorted by key.
> +///
> +/// # Invariants
> +///
> +/// In instance of `RBTreeCursor` is only acquired from [`RBTree`].
> +/// A reference to the tree used to create the cursor outlives the cursor, so
> +/// the tree cannot change. By the tree invariant, all nodes are valid.

Make the invariant mention the fields directly:
- `root` and `current` are valid pointers
- `root` points to the `root` node of an [`RBTree`]
- `current` points to a node that is in the same [`RBTree`] that `root` is pointing to

> +///
> +/// # Examples

[...]

> +pub struct RBTreeCursor<'a, K, V> {
> + _tree: PhantomData<&'a RBTree<K, V>>,
> + root: *mut bindings::rb_root,
> + current: *mut bindings::rb_node,
> +}
> +
> +// SAFETY: An [`RBTree`] allows the same kinds of access to its values that a struct allows to its
> +// fields, so we use the same Send condition as would be used for a struct with K and V fields.
> +unsafe impl<'a, K: Send, V: Send> Send for RBTreeCursor<'a, K, V> {}
> +
> +// SAFETY: An [`RBTree`] allows the same kinds of access to its values that a struct allows to its
> +// fields, so we use the same Sync condition as would be used for a struct with K and V fields.
> +unsafe impl<'a, K: Sync, V: Sync> Sync for RBTreeCursor<'a, K, V> {}
> +
> +impl<'a, K, V> RBTreeCursor<'a, K, V> {
> + /// The current node
> + pub fn current(&self) -> (&K, &V) {
> + Self::to_key_value(self.current)
> + }
> +
> + /// The current node, with a mutable value
> + pub fn current_mut(&mut self) -> (&K, &mut V) {
> + Self::to_key_value_mut(self.current)
> + }
> +
> + /// Remove the current node from the tree.
> + ///
> + /// Returns a cursor to the next node, if it exists,
> + /// else the previous node. Returns [`None`] if the tree
> + /// becomes empty.
> + pub fn remove_current(mut self) -> Option<Self> {
> + let prev = self.get_neighbor_raw(Direction::Prev);
> + let next = self.get_neighbor_raw(Direction::Next);
> + // SAFETY: All links fields we create are in a `Node<K, V>`.

This safety comment should be updated like the ones in the earlier
patches.

> + let this = unsafe { crate::container_of!(self.current, Node<K, V>, links) }.cast_mut();
> + // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so
> + // the tree cannot change. By the tree invariant, all nodes are valid.
> + unsafe { bindings::rb_erase(&mut (*this).links, self.root) };
> +
> + let current = match (prev, next) {
> + (_, Some(next)) => next,
> + (Some(prev), None) => prev,
> + (None, None) => {
> + return None;
> + }
> + };
> +
> + Some(Self {

Missing INVARIANT comment.

> + current,
> + _tree: self._tree,
> + root: self.root,
> + })
> + }
> +
> + /// Remove the previous node, returning it if it exists.
> + pub fn remove_prev(&mut self) -> Option<(K, V)> {
> + self.remove_neighbor(Direction::Prev)
> + }
> +
> + /// Remove the next node, returning it if it exists.
> + pub fn remove_next(&mut self) -> Option<(K, V)> {
> + self.remove_neighbor(Direction::Next)
> + }
> +
> + fn remove_neighbor(&mut self, direction: Direction) -> Option<(K, V)> {
> + if let Some(neighbor) = self.get_neighbor_raw(direction) {
> + // SAFETY: All links fields we create are in a `Node<K, V>`.
> + let this = unsafe { crate::container_of!(neighbor, Node<K, V>, links) }.cast_mut();
> + // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so
> + // the tree cannot change. By the tree invariant, all nodes are valid.
> + unsafe { bindings::rb_erase(&mut (*this).links, self.root) };
> + return Some(Self::to_key_value_owned(neighbor));
> + }
> + None
> + }
> +
> + /// Move the cursor to the previous node, returning [`None`] if it doesn't exist.
> + pub fn move_prev(self) -> Option<Self> {
> + self.mv(Direction::Prev)
> + }
> +
> + /// Move the cursor to the next node, returning [`None`] if it doesn't exist.
> + pub fn move_next(self) -> Option<Self> {
> + self.mv(Direction::Next)
> + }
> +
> + fn mv(mut self, direction: Direction) -> Option<Self> {
> + self.get_neighbor_raw(direction).map(|neighbor| Self {

Ditto.

> + _tree: self._tree,
> + root: self.root,
> + current: neighbor,
> + })
> + }

[...]

> + fn peek_mut(&mut self, direction: Direction) -> Option<(&K, &mut V)> {
> + // SAFETY: `self.current` is valid by the type invariants.
> + let neighbor = unsafe {
> + match direction {
> + Direction::Prev => bindings::rb_prev(self.current),
> + Direction::Next => bindings::rb_next(self.current),
> + }
> + };
> +
> + if neighbor.is_null() {
> + return None;
> + }

Why not use `get_neighbor_raw` here?

> +
> + Some(Self::to_key_value_mut(neighbor))
> + }
> +
> + fn get_neighbor_raw(&mut self, direction: Direction) -> Option<*mut bindings::rb_node> {
> + // SAFETY: `self.current` is valid by the type invariants.
> + let neighbor = unsafe {
> + match direction {
> + Direction::Prev => bindings::rb_prev(self.current),
> + Direction::Next => bindings::rb_next(self.current),
> + }
> + };
> +
> + if neighbor.is_null() {
> + return None;
> + }
> +
> + Some(neighbor)
> + }
> +
> + // This internal method should *only* be called with a valid pointer to a node.
> + fn to_key_value(node: *mut bindings::rb_node) -> (&'a K, &'a V) {
> + // SAFETY: All links fields we create are in a `Node<K, V>`.
> + let this = unsafe { crate::container_of!(node, Node<K, V>, links) };
> + // SAFETY: The passed `node` is the current node or a non-null neighbor,
> + // thus `this` is valid by the type invariants.
> + let k = unsafe { &(*this).key };
> + // SAFETY: The passed `node` is the current node or a non-null neighbor,
> + // thus `this` is valid by the type invariants.
> + let v = unsafe { &(*this).value };
> + (k, v)
> + }
> +
> + // This internal method should *only* be called with a valid pointer to a node.
> + fn to_key_value_mut(node: *mut bindings::rb_node) -> (&'a K, &'a mut V) {
> + // SAFETY: All links fields we create are in a `Node<K, V>`.
> + let this = unsafe { crate::container_of!(node, Node<K, V>, links) }.cast_mut();
> + // SAFETY: The passed `node` is the current node or a non-null neighbor,
> + // thus `this` is valid by the type invariants.
> + let k = unsafe { &(*this).key };
> + // SAFETY: The passed `node` is the current node or a non-null neighbor,
> + // thus `this` is valid by the type invariants.
> + let v = unsafe { &mut (*this).value };
> + (k, v)
> + }
> +
> + // This internal method should *only* be called with a valid pointer to a node *that is being removed*.
> + fn to_key_value_owned(node: *mut bindings::rb_node) -> (K, V) {
> + // SAFETY: All links fields we create are in a `Node<K, V>`.
> + let this = unsafe { crate::container_of!(node, Node<K, V>, links) }.cast_mut();
> + // SAFETY: The passed `node` is the current node or a non-null neighbor,
> + // thus `this` is valid by the type invariants.
> + let n = unsafe { Box::from_raw(this) };
> +
> + (n.key, n.value)
> + }

These internal methods should be `unsafe` and have a Safety section.

--
Cheers,
Benno

> +}
> +
> +/// Direction for [`RBTreeCursor`] operations.
> +enum Direction {
> + /// the node immediately before, in sort order
> + Prev,
> + /// the node immediately after, in sort order
> + Next,
> +}
> +
> impl<'a, K, V> IntoIterator for &'a RBTree<K, V> {
> type Item = (&'a K, &'a V);
> type IntoIter = RBTreeIterator<'a, K, V>;
>
> --
> 2.44.0.rc0.258.g7320e95886-goog
>