[RFC PATCH v3 1/3] bpf: Add map tracing functions and call sites

From: Joe Burton
Date: Mon Nov 01 2021 - 22:14:57 EST


From: Joe Burton <jevburton@xxxxxxxxxx>

Add two functions that fentry/fexit/fmod_ret programs can attach to:
bpf_map_trace_update_elem
bpf_map_trace_delete_elem
These functions have the same arguments as
bpf_map_{update,delete}_elem.

Invoke these functions from the following map types:
BPF_MAP_TYPE_ARRAY
BPF_MAP_TYPE_PERCPU_ARRAY
BPF_MAP_TYPE_HASH
BPF_MAP_TYPE_PERCPU_HASH
BPF_MAP_TYPE_LRU_HASH
BPF_MAP_TYPE_LRU_PERCPU_HASH

The only guarantee about these functions is that they are invoked
before the corresponding action occurs. Other conditions may prevent
the corresponding action from occurring after the function is invoked.

Signed-off-by: Joe Burton <jevburton@xxxxxxxxxx>
---
kernel/bpf/Makefile | 2 +-
kernel/bpf/arraymap.c | 6 ++++++
kernel/bpf/hashtab.c | 25 +++++++++++++++++++++++++
kernel/bpf/map_trace.c | 25 +++++++++++++++++++++++++
kernel/bpf/map_trace.h | 18 ++++++++++++++++++
5 files changed, 75 insertions(+), 1 deletion(-)
create mode 100644 kernel/bpf/map_trace.c
create mode 100644 kernel/bpf/map_trace.h

diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index cf6ca339f3cd..03ab5c058e73 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -9,7 +9,7 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
-obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o map_trace.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 5e1ccfae916b..a0b4f1769e17 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -13,6 +13,7 @@
#include <linux/rcupdate_trace.h>

#include "map_in_map.h"
+#include "map_trace.h"

#define ARRAY_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
@@ -300,6 +301,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
char *val;
+ int err;

if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
/* unknown flags */
@@ -317,6 +319,10 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
!map_value_has_spin_lock(map)))
return -EINVAL;

+ err = bpf_map_trace_update_elem(map, key, value, map_flags);
+ if (unlikely(err))
+ return err;
+
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d29af9988f37..c1816a615d82 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,6 +13,7 @@
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
#include "map_in_map.h"
+#include "map_trace.h"

#define HTAB_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
@@ -1041,6 +1042,10 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
b = __select_bucket(htab, hash);
head = &b->head;

+ ret = bpf_map_trace_update_elem(map, key, value, map_flags);
+ if (unlikely(ret))
+ return ret;
+
if (unlikely(map_flags & BPF_F_LOCK)) {
if (unlikely(!map_value_has_spin_lock(map)))
return -EINVAL;
@@ -1133,6 +1138,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
/* unknown flags */
return -EINVAL;

+ ret = bpf_map_trace_update_elem(map, key, value, map_flags);
+ if (unlikely(ret))
+ return ret;
+
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());

@@ -1201,6 +1210,10 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
/* unknown flags */
return -EINVAL;

+ ret = bpf_map_trace_update_elem(map, key, value, map_flags);
+ if (unlikely(ret))
+ return ret;
+
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());

@@ -1256,6 +1269,10 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
/* unknown flags */
return -EINVAL;

+ ret = bpf_map_trace_update_elem(map, key, value, map_flags);
+ if (unlikely(ret))
+ return ret;
+
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());

@@ -1334,6 +1351,10 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());

+ ret = bpf_map_trace_delete_elem(map, key);
+ if (unlikely(ret))
+ return ret;
+
key_size = map->key_size;

hash = htab_map_hash(key, key_size, htab->hashrnd);
@@ -1370,6 +1391,10 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held());

+ ret = bpf_map_trace_delete_elem(map, key);
+ if (unlikely(ret))
+ return ret;
+
key_size = map->key_size;

hash = htab_map_hash(key, key_size, htab->hashrnd);
diff --git a/kernel/bpf/map_trace.c b/kernel/bpf/map_trace.c
new file mode 100644
index 000000000000..661b433f1451
--- /dev/null
+++ b/kernel/bpf/map_trace.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021 Google */
+#include "map_trace.h"
+
+noinline int bpf_map_trace_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
+{
+ /*
+ * Noop side effect prevents call site from being optimized out.
+ */
+ asm("");
+ return 0;
+}
+ALLOW_ERROR_INJECTION(bpf_map_trace_update_elem, ERRNO);
+
+noinline int bpf_map_trace_delete_elem(struct bpf_map *map, void *key)
+{
+ /*
+ * Noop side effect prevents call site from being optimized out.
+ */
+ asm("");
+ return 0;
+}
+ALLOW_ERROR_INJECTION(bpf_map_trace_delete_elem, ERRNO);
+
diff --git a/kernel/bpf/map_trace.h b/kernel/bpf/map_trace.h
new file mode 100644
index 000000000000..12356a2e1f9f
--- /dev/null
+++ b/kernel/bpf/map_trace.h
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021 Google */
+#pragma once
+
+#include <linux/bpf.h>
+
+/*
+ * Map tracing hooks. They are called from some, but not all, bpf map types.
+ * For those map types which call them, the only guarantee is that they are
+ * called before the corresponding action (bpf_map_update_elem, etc.) takes
+ * effect. Thus an fmod_ret program may use these hooks to prevent a map from
+ * being mutated via the corresponding helpers.
+ */
+noinline int bpf_map_trace_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags);
+
+noinline int bpf_map_trace_delete_elem(struct bpf_map *map, void *key);
+
--
2.33.1.1089.g2158813163f-goog