[PATCH 16/15] futex,selftests: Extend the futex selftests for NUMA

From: Peter Zijlstra
Date: Fri Sep 22 2023 - 16:59:56 EST


XXX

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
tools/testing/selftests/futex/functional/Makefile | 3
tools/testing/selftests/futex/functional/futex_numa.c | 262 ++++++++++++++++++
2 files changed, 264 insertions(+), 1 deletion(-)

--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -17,7 +17,8 @@ TEST_GEN_PROGS := \
futex_wait_private_mapped_file \
futex_wait \
futex_requeue \
- futex_waitv
+ futex_waitv \
+ futex_numa

TEST_PROGS := run.sh

--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_numa.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <time.h>
+#include <assert.h>
+#include "logging.h"
+#include "futextest.h"
+#include "futex2test.h"
+
+typedef u_int32_t u32;
+typedef int32_t s32;
+typedef u_int64_t u64;
+
+static int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
+static int fnode = FUTEX_NO_NODE;
+
+/* fairly stupid test-and-set lock with a waiter flag */
+
+#define N_LOCK 0x0000001
+#define N_WAITERS 0x0001000
+
+struct futex_numa_32 {
+ union {
+ u64 full;
+ struct {
+ u32 val;
+ u32 node;
+ };
+ };
+};
+
+void futex_numa_32_lock(struct futex_numa_32 *lock)
+{
+ for (;;) {
+ struct futex_numa_32 new, old = {
+ .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
+ };
+
+ for (;;) {
+ new = old;
+ if (old.val == 0) {
+ /* no waiter, no lock -> first lock, set no-node */
+ new.node = fnode;
+ }
+ if (old.val & N_LOCK) {
+ /* contention, set waiter */
+ new.val |= N_WAITERS;
+ }
+ new.val |= N_LOCK;
+
+ /* nothing changed, ready to block */
+ if (old.full == new.full)
+ break;
+
+ /*
+ * Use u64 cmpxchg to set the futex value and node in a
+ * consistent manner.
+ */
+ if (__atomic_compare_exchange_n(&lock->full,
+ &old.full, new.full,
+ /* .weak */ false,
+ __ATOMIC_ACQUIRE,
+ __ATOMIC_RELAXED)) {
+
+ /* if we just set N_LOCK, we own it */
+ if (!(old.val & N_LOCK))
+ return;
+
+ /* go block */
+ break;
+ }
+ }
+
+ futex2_wait(lock, new.val, ~0U, fflags, NULL, 0);
+ }
+}
+
+void futex_numa_32_unlock(struct futex_numa_32 *lock)
+{
+ u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
+ assert((s32)val >= 0);
+ if (val & N_WAITERS) {
+ int woken = futex2_wake(lock, ~0U, 1, fflags);
+ assert(val == N_WAITERS);
+ if (!woken) {
+ __atomic_compare_exchange_n(&lock->val, &val, 0U,
+ false, __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED);
+ }
+ }
+}
+
+static long nanos = 50000;
+
+struct thread_args {
+ pthread_t tid;
+ volatile int * done;
+ struct futex_numa_32 *lock;
+ int val;
+ int *val1, *val2;
+ int node;
+};
+
+static void *threadfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+ struct timespec ts = {
+ .tv_nsec = nanos,
+ };
+ int node;
+
+ while (!*args->done) {
+
+ futex_numa_32_lock(args->lock);
+ args->val++;
+
+ assert(*args->val1 == *args->val2);
+ (*args->val1)++;
+ nanosleep(&ts, NULL);
+ (*args->val2)++;
+
+ node = args->lock->node;
+ futex_numa_32_unlock(args->lock);
+
+ if (node != args->node) {
+ args->node = node;
+ printf("node: %d\n", node);
+ }
+
+ nanosleep(&ts, NULL);
+ }
+
+ return NULL;
+}
+
+static void *contendfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+
+ while (!*args->done) {
+ /*
+ * futex2_wait() will take hb-lock, verify *var == val and
+ * queue/abort. By knowingly setting val 'wrong' this will
+ * abort and thereby generate hb-lock contention.
+ */
+ futex2_wait(&args->lock->val, ~0U, ~0U, fflags, NULL, 0);
+ args->val++;
+ }
+
+ return NULL;
+}
+
+static volatile int done = 0;
+static struct futex_numa_32 lock = { .val = 0, };
+static int val1, val2;
+
+int main(int argc, char *argv[])
+{
+ struct thread_args *tas[512], *cas[512];
+ int c, t, threads = 2, contenders = 0;
+ int sleeps = 10;
+ int total = 0;
+
+ while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
+ switch (c) {
+ case 'c':
+ contenders = atoi(optarg);
+ break;
+ case 't':
+ threads = atoi(optarg);
+ break;
+ case 's':
+ sleeps = atoi(optarg);
+ break;
+ case 'n':
+ nanos = atoi(optarg);
+ break;
+ case 'N':
+ fflags |= FUTEX2_NUMA;
+ if (optarg)
+ fnode = atoi(optarg);
+ break;
+ default:
+ exit(1);
+ break;
+ }
+ }
+
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, contendfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ cas[t] = args;
+ }
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, threadfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ tas[t] = args;
+ }
+
+ sleep(sleeps);
+
+ done = true;
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = tas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+// printf("tval: %d\n", args->val);
+ }
+ printf("total: %d\n", total);
+
+ if (contenders) {
+ total = 0;
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = cas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+// printf("tval: %d\n", args->val);
+ }
+ printf("contenders: %d\n", total);
+ }
+
+ return 0;
+}
+