[RFC PATCH 06/11] rust: apply cache line padding for `SpinLock`

From: Andreas Hindborg
Date: Wed May 03 2023 - 05:07:50 EST


From: Andreas Hindborg <a.hindborg@xxxxxxxxxxx>

The kernel `struct spinlock` is 4 bytes on x86 when lockdep is not enabled. The
structure is not padded to fit a cache line. The effect of this for `SpinLock`
is that the lock variable and the value protected by the lock will share a cache
line, depending on the alignment requirements of the protected value. Aligning
the lock variable and the protected value to a cache line yields a 20%
performance increase for the Rust null block driver for sequential reads to
memory backed devices at 6 concurrent readers.

Signed-off-by: Andreas Hindborg <a.hindborg@xxxxxxxxxxx>
---
rust/kernel/cache_padded.rs | 33 +++++++++++++++++++++++++++++++
rust/kernel/lib.rs | 2 ++
rust/kernel/sync/lock.rs | 9 ++++++---
rust/kernel/sync/lock/spinlock.rs | 13 ++++++++----
4 files changed, 50 insertions(+), 7 deletions(-)
create mode 100644 rust/kernel/cache_padded.rs

diff --git a/rust/kernel/cache_padded.rs b/rust/kernel/cache_padded.rs
new file mode 100644
index 000000000000..758678e71f50
--- /dev/null
+++ b/rust/kernel/cache_padded.rs
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#[repr(align(64))]
+pub struct CachePadded<T: ?Sized> {
+ value: T,
+}
+
+unsafe impl<T: Send> Send for CachePadded<T> {}
+unsafe impl<T: Sync> Sync for CachePadded<T> {}
+
+impl<T> CachePadded<T> {
+ /// Pads and aligns a value to 64 bytes.
+ #[inline(always)]
+ pub(crate) const fn new(t: T) -> CachePadded<T> {
+ CachePadded::<T> { value: t }
+ }
+}
+
+impl<T: ?Sized> core::ops::Deref for CachePadded<T> {
+ type Target = T;
+
+ #[inline(always)]
+ fn deref(&self) -> &T {
+ &self.value
+ }
+}
+
+impl<T: ?Sized> core::ops::DerefMut for CachePadded<T> {
+ #[inline(always)]
+ fn deref_mut(&mut self) -> &mut T {
+ &mut self.value
+ }
+}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index a0bd0b0e2aef..426e2dea0da6 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -37,6 +37,7 @@ extern crate self as kernel;
mod allocator;
pub mod block;
mod build_assert;
+mod cache_padded;
pub mod error;
pub mod init;
pub mod ioctl;
@@ -56,6 +57,7 @@ pub mod types;

#[doc(hidden)]
pub use bindings;
+pub(crate) use cache_padded::CachePadded;
pub use macros;
pub use uapi;

diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs
index a2216325632d..1c584b1df30d 100644
--- a/rust/kernel/sync/lock.rs
+++ b/rust/kernel/sync/lock.rs
@@ -6,7 +6,9 @@
//! spinlocks, raw spinlocks) to be provided with minimal effort.

use super::LockClassKey;
-use crate::{bindings, init::PinInit, pin_init, str::CStr, types::Opaque, types::ScopeGuard};
+use crate::{
+ bindings, init::PinInit, pin_init, str::CStr, types::Opaque, types::ScopeGuard, CachePadded,
+};
use core::{cell::UnsafeCell, marker::PhantomData, marker::PhantomPinned};
use macros::pin_data;

@@ -87,7 +89,7 @@ pub struct Lock<T: ?Sized, B: Backend> {
_pin: PhantomPinned,

/// The data protected by the lock.
- pub(crate) data: UnsafeCell<T>,
+ pub(crate) data: CachePadded<UnsafeCell<T>>,
}

// SAFETY: `Lock` can be transferred across thread boundaries iff the data it protects can.
@@ -102,7 +104,7 @@ impl<T, B: Backend> Lock<T, B> {
#[allow(clippy::new_ret_no_self)]
pub fn new(t: T, name: &'static CStr, key: &'static LockClassKey) -> impl PinInit<Self> {
pin_init!(Self {
- data: UnsafeCell::new(t),
+ data: CachePadded::new(UnsafeCell::new(t)),
_pin: PhantomPinned,
// SAFETY: `slot` is valid while the closure is called and both `name` and `key` have
// static lifetimes so they live indefinitely.
@@ -115,6 +117,7 @@ impl<T, B: Backend> Lock<T, B> {

impl<T: ?Sized, B: Backend> Lock<T, B> {
/// Acquires the lock and gives the caller access to the data protected by it.
+ #[inline(always)]
pub fn lock(&self) -> Guard<'_, T, B> {
// SAFETY: The constructor of the type calls `init`, so the existence of the object proves
// that `init` was called.
diff --git a/rust/kernel/sync/lock/spinlock.rs b/rust/kernel/sync/lock/spinlock.rs
index 979b56464a4e..e39142a8148c 100644
--- a/rust/kernel/sync/lock/spinlock.rs
+++ b/rust/kernel/sync/lock/spinlock.rs
@@ -4,7 +4,10 @@
//!
//! This module allows Rust code to use the kernel's `spinlock_t`.

+use core::ops::DerefMut;
+
use crate::bindings;
+use crate::CachePadded;

/// Creates a [`SpinLock`] initialiser with the given name and a newly-created lock class.
///
@@ -90,7 +93,7 @@ pub struct SpinLockBackend;
// SAFETY: The underlying kernel `spinlock_t` object ensures mutual exclusion. `relock` uses the
// default implementation that always calls the same locking method.
unsafe impl super::Backend for SpinLockBackend {
- type State = bindings::spinlock_t;
+ type State = CachePadded<bindings::spinlock_t>;
type GuardState = ();

unsafe fn init(
@@ -100,18 +103,20 @@ unsafe impl super::Backend for SpinLockBackend {
) {
// SAFETY: The safety requirements ensure that `ptr` is valid for writes, and `name` and
// `key` are valid for read indefinitely.
- unsafe { bindings::__spin_lock_init(ptr, name, key) }
+ unsafe { bindings::__spin_lock_init((&mut *ptr).deref_mut(), name, key) }
}

+ #[inline(always)]
unsafe fn lock(ptr: *mut Self::State) -> Self::GuardState {
// SAFETY: The safety requirements of this function ensure that `ptr` points to valid
// memory, and that it has been initialised before.
- unsafe { bindings::spin_lock(ptr) }
+ unsafe { bindings::spin_lock((&mut *ptr).deref_mut()) }
}

+ #[inline(always)]
unsafe fn unlock(ptr: *mut Self::State, _guard_state: &Self::GuardState) {
// SAFETY: The safety requirements of this function ensure that `ptr` is valid and that the
// caller is the owner of the mutex.
- unsafe { bindings::spin_unlock(ptr) }
+ unsafe { bindings::spin_unlock((&mut *ptr).deref_mut()) }
}
}
--
2.40.0