Re: [PATCH RFC bpf-next v3 04/16] bpf/helpers: introduce sleepable bpf_timers

From: Alexei Starovoitov
Date: Thu Feb 22 2024 - 15:48:03 EST


On Wed, Feb 21, 2024 at 8:25 AM Benjamin Tissoires <bentiss@xxxxxxxxxx> wrote:
> /* the actual struct hidden inside uapi struct bpf_timer */
> @@ -1113,6 +1120,55 @@ struct bpf_timer_kern {
> struct bpf_spin_lock lock;
> } __attribute__((aligned(8)));
>
> +static u32 __bpf_timer_compute_key(struct bpf_hrtimer *timer)
> +{
> + struct bpf_map *map = timer->map;
> + void *value = timer->value;
> +
> + if (map->map_type == BPF_MAP_TYPE_ARRAY) {
> + struct bpf_array *array = container_of(map, struct bpf_array, map);
> +
> + /* compute the key */
> + return ((char *)value - array->value) / array->elem_size;
> + }
> +
> + /* hash or lru */
> + return *(u32 *)(value - round_up(map->key_size, 8));
> +}
> +
> +static void bpf_timer_work_cb(struct work_struct *work)
> +{
> + struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, work);
> + struct bpf_map *map = t->map;
> + void *value = t->value;
> + bpf_callback_t callback_fn;
> + u32 key;
> +
> + BTF_TYPE_EMIT(struct bpf_timer);
> +
> + down(&t->sleepable_lock);
> +
> + callback_fn = READ_ONCE(t->callback_fn);
> + if (!callback_fn) {
> + up(&t->sleepable_lock);
> + return;
> + }
> +
> + key = __bpf_timer_compute_key(t);
> +
> +
> + callback_fn((u64)(long)map, (u64)(long)&key, (u64)(long)value, 0, 0);
> + /* The verifier checked that return value is zero. */
> +
> + bpf_prog_put(t->prog);
> +}
> +
> static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
>
> static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
> @@ -1121,8 +1177,7 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
> struct bpf_map *map = t->map;
> void *value = t->value;
> bpf_callback_t callback_fn;
> - void *key;
> - u32 idx;
> + u32 key;
>
> BTF_TYPE_EMIT(struct bpf_timer);
> callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
> @@ -1136,17 +1191,9 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
> * bpf_map_delete_elem() on the same timer.
> */
> this_cpu_write(hrtimer_running, t);
> - if (map->map_type == BPF_MAP_TYPE_ARRAY) {
> - struct bpf_array *array = container_of(map, struct bpf_array, map);
> -
> - /* compute the key */
> - idx = ((char *)value - array->value) / array->elem_size;
> - key = &idx;
> - } else { /* hash or lru */
> - key = value - round_up(map->key_size, 8);
> - }
> + key = __bpf_timer_compute_key(t);

Please don't mix such "cleanup" with main changes.
It's buggy for a hash map.
Instead of passing a pointer to the real key into bpf prog
you're reading the first 4 bytes from the key. Copying it into a temp var
and passing an address to that.
It would have been very painful to debug such a bug if it slipped through,
since bpf prog would sort-of work for 4-byte keys.