Re: [PATCH v1 11/14] futex: Implement FUTEX2_NUMA

From: Thomas Gleixner
Date: Mon Jul 31 2023 - 13:36:28 EST


On Fri, Jul 21 2023 at 12:22, Peter Zijlstra wrote:
> struct futex_hash_bucket *futex_hash(union futex_key *key)
> {
> - u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
> + u32 hash = jhash2((u32 *)key,
> + offsetof(typeof(*key), both.offset) / sizeof(u32),
> key->both.offset);
> + int node = key->both.node;
>
> - return &futex_queues[hash & (futex_hashsize - 1)];
> + if (node == -1) {
> + /*
> + * In case of !FLAGS_NUMA, use some unused hash bits to pick a
> + * node -- this ensures regular futexes are interleaved across
> + * the nodes and avoids having to allocate multiple
> + * hash-tables.
> + *
> + * NOTE: this isn't perfectly uniform, but it is fast and
> + * handles sparse node masks.
> + */
> + node = (hash >> futex_hashshift) % nr_node_ids;

Is nr_node_ids guaranteed to be stable after init? It's marked
__read_mostly, but not __ro_after_init.

> + if (!node_possible(node)) {
> + node = find_next_bit_wrap(node_possible_map.bits,
> + nr_node_ids, node);
> + }
> + }
> +
> + return &futex_queues[node][hash & (futex_hashsize - 1)];
> }
> fshared = flags & FLAGS_SHARED;
> + size = futex_size(flags);
>
> /*
> * The futex address must be "naturally" aligned.
> */
> key->both.offset = address % PAGE_SIZE;
> - if (unlikely((address % sizeof(u32)) != 0))
> + if (unlikely((address % size) != 0))
> return -EINVAL;

Hmm. Shouldn't that have changed with the allowance of the 1 and 2 byte
futexes?

> address -= key->both.offset;
>
> - if (unlikely(!access_ok(uaddr, sizeof(u32))))
> + if (flags & FLAGS_NUMA)
> + size *= 2;
> +
> + if (unlikely(!access_ok(uaddr, size)))
> return -EFAULT;
>
> if (unlikely(should_fail_futex(fshared)))
> return -EFAULT;
>
> + key->both.node = -1;

Please put this into an else path.

> + if (flags & FLAGS_NUMA) {
> + void __user *naddr = uaddr + size/2;

size / 2;

> +
> + if (futex_get_value(&node, naddr, flags))
> + return -EFAULT;
> +
> + if (node == -1) {
> + node = numa_node_id();
> + if (futex_put_value(node, naddr, flags))
> + return -EFAULT;
> + }
> +
> + if (node >= MAX_NUMNODES || !node_possible(node))
> + return -EINVAL;

That's clearly an else path too. No point in checking whether
numa_node_id() is valid.

> + key->both.node = node;
> + }
>
> +static inline unsigned int futex_size(unsigned int flags)
> +{
> + unsigned int size = flags & FLAGS_SIZE_MASK;
> + return 1 << size; /* {0,1,2,3} -> {1,2,4,8} */
> +}
> +
> static inline bool futex_flags_valid(unsigned int flags)
> {
> /* Only 64bit futexes for 64bit code */
> @@ -77,13 +83,19 @@ static inline bool futex_flags_valid(uns
> if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
> return false;
>
> - return true;
> -}
> + /*
> + * Must be able to represent both NUMA_NO_NODE and every valid nodeid
> + * in a futex word.
> + */
> + if (flags & FLAGS_NUMA) {
> + int bits = 8 * futex_size(flags);
> + u64 max = ~0ULL;
> + max >>= 64 - bits;
Your newline key is broken, right?
> + if (nr_node_ids >= max)
> + return false;
> + }

Thanks,

tglx