Re: [PATCH bpf-next v3 3/7] bpf: Introduce task open coded iterator kfuncs

From: Andrii Nakryiko
Date: Wed Sep 27 2023 - 19:21:02 EST


On Mon, Sep 25, 2023 at 3:56 AM Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx> wrote:
>
> This patch adds kfuncs bpf_iter_task_{new,next,destroy} which allow
> creation and manipulation of struct bpf_iter_task in open-coded iterator
> style. BPF programs can use these kfuncs or through bpf_for_each macro to
> iterate all processes in the system.
>
> The API design keep consistent with SEC("iter/task"). bpf_iter_task_new()
> accepts a specific task and iterating type which allows:
> 1. iterating all process in the system
>
> 2. iterating all threads in the system
>
> 3. iterating all threads of a specific task
> Here we also resuse enum bpf_iter_task_type and rename BPF_TASK_ITER_TID
> to BPF_TASK_ITER_THREAD, rename BPF_TASK_ITER_TGID to BPF_TASK_ITER_PROC.
>
> The newly-added struct bpf_iter_task has a name collision with a selftest
> for the seq_file task iter's bpf skel, so the selftests/bpf/progs file is
> renamed in order to avoid the collision.
>
> Signed-off-by: Chuyi Zhou <zhouchuyi@xxxxxxxxxxxxx>
> ---
> include/linux/bpf.h | 8 +-
> kernel/bpf/helpers.c | 3 +
> kernel/bpf/task_iter.c | 96 ++++++++++++++++---
> .../testing/selftests/bpf/bpf_experimental.h | 5 +
> .../selftests/bpf/prog_tests/bpf_iter.c | 18 ++--
> .../{bpf_iter_task.c => bpf_iter_tasks.c} | 0
> 6 files changed, 106 insertions(+), 24 deletions(-)
> rename tools/testing/selftests/bpf/progs/{bpf_iter_task.c => bpf_iter_tasks.c} (100%)
>

[...]

> @@ -692,9 +692,9 @@ static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct b
> static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq)
> {
> seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]);
> - if (aux->task.type == BPF_TASK_ITER_TID)
> + if (aux->task.type == BPF_TASK_ITER_THREAD)
> seq_printf(seq, "tid:\t%u\n", aux->task.pid);
> - else if (aux->task.type == BPF_TASK_ITER_TGID)
> + else if (aux->task.type == BPF_TASK_ITER_PROC)
> seq_printf(seq, "pid:\t%u\n", aux->task.pid);
> }
>
> @@ -856,6 +856,80 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it)
> bpf_mem_free(&bpf_global_ma, kit->css_it);
> }
>
> +struct bpf_iter_task {
> + __u64 __opaque[2];
> + __u32 __opaque_int[1];

this should be __u64 __opaque[3], because struct takes full 24 bytes

> +} __attribute__((aligned(8)));
> +
> +struct bpf_iter_task_kern {
> + struct task_struct *task;
> + struct task_struct *pos;
> + unsigned int type;
> +} __attribute__((aligned(8)));
> +
> +__bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, struct task_struct *task, unsigned int type)

nit: type -> flags, so we can add a bit more stuff, if necessary

> +{
> + struct bpf_iter_task_kern *kit = (void *)it;

empty line after variable declarations

> + BUILD_BUG_ON(sizeof(struct bpf_iter_task_kern) != sizeof(struct bpf_iter_task));
> + BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
> + __alignof__(struct bpf_iter_task));

and I'd add empty line here to keep BUILD_BUG_ON block separate

> + kit->task = kit->pos = NULL;
> + switch (type) {
> + case BPF_TASK_ITER_ALL:
> + case BPF_TASK_ITER_PROC:
> + case BPF_TASK_ITER_THREAD:
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + if (type == BPF_TASK_ITER_THREAD)
> + kit->task = task;
> + else
> + kit->task = &init_task;
> + kit->pos = kit->task;
> + kit->type = type;
> + return 0;
> +}
> +
> +__bpf_kfunc struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it)
> +{
> + struct bpf_iter_task_kern *kit = (void *)it;
> + struct task_struct *pos;
> + unsigned int type;
> +
> + type = kit->type;
> + pos = kit->pos;
> +
> + if (!pos)
> + goto out;
> +
> + if (type == BPF_TASK_ITER_PROC)
> + goto get_next_task;
> +
> + kit->pos = next_thread(kit->pos);
> + if (kit->pos == kit->task) {
> + if (type == BPF_TASK_ITER_THREAD) {
> + kit->pos = NULL;
> + goto out;
> + }
> + } else
> + goto out;
> +
> +get_next_task:
> + kit->pos = next_task(kit->pos);
> + kit->task = kit->pos;
> + if (kit->pos == &init_task)
> + kit->pos = NULL;

I can't say I completely follow the logic (e.g., for
BPF_TASK_ITER_PROC, why do we do next_task() on first next() call)?
Can you elabore the expected behavior for various combinations of
types and starting task argument?

> +
> +out:
> + return pos;
> +}
> +
> +__bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it)
> +{
> +}
> +
> DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
>
> static void do_mmap_read_unlock(struct irq_work *entry)
> diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
> index d3ea90f0e142..d989775dbdb5 100644
> --- a/tools/testing/selftests/bpf/bpf_experimental.h
> +++ b/tools/testing/selftests/bpf/bpf_experimental.h
> @@ -169,4 +169,9 @@ extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
> extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym;
> extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym;
>
> +struct bpf_iter_task;
> +extern int bpf_iter_task_new(struct bpf_iter_task *it, struct task_struct *task, unsigned int type) __weak __ksym;
> +extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym;
> +extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;
> +
> #endif
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c

please split out selftests changes from kernel-side changes. We only
combine them if kernel changes break selftests, preventing bisection.

[...]