[RFC PATCH 61/86] treewide: bpf: remove cond_resched()

From: Ankur Arora
Date: Tue Nov 07 2023 - 18:10:56 EST


There are broadly three sets of uses of cond_resched():

1. Calls to cond_resched() out of the goodness of our heart,
otherwise known as avoiding lockup splats.

2. Open coded variants of cond_resched_lock() which call
cond_resched().

3. Retry or error handling loops, where cond_resched() is used as a
quick alternative to spinning in a tight-loop.

When running under a full preemption model, the cond_resched() reduces
to a NOP (not even a barrier) so removing it obviously cannot matter.

But considering only voluntary preemption models (for say code that
has been mostly tested under those), for set-1 and set-2 the
scheduler can now preempt kernel tasks running beyond their time
quanta anywhere they are preemptible() [1]. Which removes any need
for these explicitly placed scheduling points.

The cond_resched() calls in set-3 are a little more difficult.
To start with, given it's NOP character under full preemption, it
never actually saved us from a tight loop.
With voluntary preemption, it's not a NOP, but it might as well be --
for most workloads the scheduler does not have an interminable supply
of runnable tasks on the runqueue.

So, cond_resched() is useful to not get softlockup splats, but not
terribly good for error handling. Ideally, these should be replaced
with some kind of timed or event wait.
For now we use cond_resched_stall(), which tries to schedule if
possible, and executes a cpu_relax() if not.

All the uses of cond_resched() here are from set-1, so we can trivially
remove them.

[1] https://lore.kernel.org/lkml/20231107215742.363031-1-ankur.a.arora@xxxxxxxxxx/

Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Cc: Andrii Nakryiko <andrii@xxxxxxxxxx>
Cc: Martin KaFai Lau <martin.lau@xxxxxxxxx>
Cc: bpf@xxxxxxxxxxxxxxx
Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx>
---
kernel/bpf/arraymap.c | 3 ---
kernel/bpf/bpf_iter.c | 7 +------
kernel/bpf/btf.c | 9 ---------
kernel/bpf/cpumap.c | 2 --
kernel/bpf/hashtab.c | 7 -------
kernel/bpf/syscall.c | 3 ---
kernel/bpf/verifier.c | 5 -----
7 files changed, 1 insertion(+), 35 deletions(-)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2058e89b5ddd..cb0d626038b4 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -25,7 +25,6 @@ static void bpf_array_free_percpu(struct bpf_array *array)

for (i = 0; i < array->map.max_entries; i++) {
free_percpu(array->pptrs[i]);
- cond_resched();
}
}

@@ -42,7 +41,6 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
return -ENOMEM;
}
array->pptrs[i] = ptr;
- cond_resched();
}

return 0;
@@ -423,7 +421,6 @@ static void array_map_free(struct bpf_map *map)

for_each_possible_cpu(cpu) {
bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu));
- cond_resched();
}
}
} else {
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 96856f130cbf..dfb24f76ccf7 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -73,7 +73,7 @@ static inline bool bpf_iter_target_support_resched(const struct bpf_iter_target_
return tinfo->reg_info->feature & BPF_ITER_RESCHED;
}

-static bool bpf_iter_support_resched(struct seq_file *seq)
+static bool __maybe_unused bpf_iter_support_resched(struct seq_file *seq)
{
struct bpf_iter_priv_data *iter_priv;

@@ -97,7 +97,6 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
struct seq_file *seq = file->private_data;
size_t n, offs, copied = 0;
int err = 0, num_objs = 0;
- bool can_resched;
void *p;

mutex_lock(&seq->lock);
@@ -150,7 +149,6 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
goto done;
}

- can_resched = bpf_iter_support_resched(seq);
while (1) {
loff_t pos = seq->index;

@@ -196,9 +194,6 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
}
break;
}
-
- if (can_resched)
- cond_resched();
}
stop:
offs = seq->count;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 8090d7fb11ef..fe560f80e230 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5361,8 +5361,6 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
if (!__btf_type_is_struct(t))
continue;

- cond_resched();
-
for_each_member(j, t, member) {
if (btf_id_set_contains(&aof.set, member->type))
goto parse;
@@ -5427,8 +5425,6 @@ static int btf_check_type_tags(struct btf_verifier_env *env,
if (!btf_type_is_modifier(t))
continue;

- cond_resched();
-
in_tags = btf_type_is_type_tag(t);
while (btf_type_is_modifier(t)) {
if (!chain_limit--) {
@@ -8296,11 +8292,6 @@ bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf,
if (!targ_name)
continue;

- /* the resched point is before strncmp to make sure that search
- * for non-existing name will have a chance to schedule().
- */
- cond_resched();
-
if (strncmp(cands->name, targ_name, cands->name_len) != 0)
continue;

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index e42a1bdb7f53..0aed2a6ef262 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -290,8 +290,6 @@ static int cpu_map_kthread_run(void *data)
} else {
__set_current_state(TASK_RUNNING);
}
- } else {
- sched = cond_resched();
}

/*
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index a8c7e1c5abfa..17ed14d2dd44 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -142,7 +142,6 @@ static void htab_init_buckets(struct bpf_htab *htab)
raw_spin_lock_init(&htab->buckets[i].raw_lock);
lockdep_set_class(&htab->buckets[i].raw_lock,
&htab->lockdep_key);
- cond_resched();
}
}

@@ -232,7 +231,6 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)

elem = get_htab_elem(htab, i);
bpf_obj_free_timer(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
- cond_resched();
}
}

@@ -255,13 +253,10 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab)

for_each_possible_cpu(cpu) {
bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
- cond_resched();
}
} else {
bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
- cond_resched();
}
- cond_resched();
}
}

@@ -278,7 +273,6 @@ static void htab_free_elems(struct bpf_htab *htab)
pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
htab->map.key_size);
free_percpu(pptr);
- cond_resched();
}
free_elems:
bpf_map_area_free(htab->elems);
@@ -337,7 +331,6 @@ static int prealloc_init(struct bpf_htab *htab)
goto free_elems;
htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
pptr);
- cond_resched();
}

skip_percpu_elems:
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d77b2f8b9364..8762c3d678be 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1695,7 +1695,6 @@ int generic_map_delete_batch(struct bpf_map *map,
bpf_enable_instrumentation();
if (err)
break;
- cond_resched();
}
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
@@ -1752,7 +1751,6 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,

if (err)
break;
- cond_resched();
}

if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
@@ -1849,7 +1847,6 @@ int generic_map_lookup_batch(struct bpf_map *map,
swap(prev_key, key);
retry = MAP_LOOKUP_RETRIES;
cp++;
- cond_resched();
}

if (err == -EFAULT)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 873ade146f3d..25e6f318c561 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -16489,9 +16489,6 @@ static int do_check(struct bpf_verifier_env *env)
if (signal_pending(current))
return -EAGAIN;

- if (need_resched())
- cond_resched();
-
if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
verbose(env, "\nfrom %d to %d%s:",
env->prev_insn_idx, env->insn_idx,
@@ -18017,7 +18014,6 @@ static int jit_subprogs(struct bpf_verifier_env *env)
err = -ENOTSUPP;
goto out_free;
}
- cond_resched();
}

/* at this point all bpf functions were successfully JITed
@@ -18061,7 +18057,6 @@ static int jit_subprogs(struct bpf_verifier_env *env)
err = -ENOTSUPP;
goto out_free;
}
- cond_resched();
}

/* finally lock prog and jit images for all functions and
--
2.31.1