[PATCH 08/10] bpf samples: Add utils.[ch] for using BPF

From: Wang Nan
Date: Thu Dec 17 2015 - 00:27:14 EST


We are going to uses libbpf to replace old libbpf.[ch] and
bpf_load.[ch]. This is the first patch of this work. In this patch,
several macros and helpers in libbpf.[ch] and bpf_load.[ch] are
merged into utils.[ch]. utils.[ch] utilizes libbpf in tools/lib to
deal with BPF related things. They would be compiled after Makefile
changes.

Signed-off-by: Wang Nan <wangnan0@xxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Alex Gartrell <agartrell@xxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Brenden Blanco <bblanco@xxxxxxxxxxxx>
Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Cc: Daniel Wagner <daniel.wagner@xxxxxxxxxxxx>
Cc: David S. Miller <davem@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Kaixu Xia <xiakaixu@xxxxxxxxxx>
Cc: Michael Holzheu <holzheu@xxxxxxxxxxxxxxxxxx>
Cc: Yang Shi <yang.shi@xxxxxxxxxx>
---
samples/bpf/include/linux/err.h | 56 ++++++++
samples/bpf/utils.c | 276 ++++++++++++++++++++++++++++++++++++++++
samples/bpf/utils.h | 217 +++++++++++++++++++++++++++++++
3 files changed, 549 insertions(+)
create mode 100644 samples/bpf/include/linux/err.h
create mode 100644 samples/bpf/utils.c
create mode 100644 samples/bpf/utils.h

diff --git a/samples/bpf/include/linux/err.h b/samples/bpf/include/linux/err.h
new file mode 100644
index 0000000..671b874
--- /dev/null
+++ b/samples/bpf/include/linux/err.h
@@ -0,0 +1,56 @@
+#ifndef __TOOLS_LINUX_ERR_H
+#define __TOOLS_LINUX_ERR_H
+
+#include <asm/errno.h>
+
+#ifndef __must_check
+# define __must_check
+#endif
+#ifndef __force
+# define __force
+#endif
+#ifndef unlikely
+# define unlikely(x) x
+#endif
+
+/*
+ * Original kernel header comment:
+ *
+ * Kernel pointers have redundant information, so we can use a
+ * scheme where we can return either an error code or a normal
+ * pointer with the same return value.
+ *
+ * This should be a per-architecture thing, to allow different
+ * error and pointer decisions.
+ *
+ * Userspace note:
+ * The same principle works for userspace, because 'error' pointers
+ * fall down to the unused hole far from user space, as described
+ * in Documentation/x86/x86_64/mm.txt for x86_64 arch:
+ *
+ * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension
+ * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ *
+ * It should be the same case for other architectures, because
+ * this code is used in generic kernel code.
+ */
+#define MAX_ERRNO 4095
+
+#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error_)
+{
+ return (void *) error_;
+}
+
+static inline long __must_check PTR_ERR(__force const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline bool __must_check IS_ERR(__force const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+#endif /* _LINUX_ERR_H */
diff --git a/samples/bpf/utils.c b/samples/bpf/utils.c
new file mode 100644
index 0000000..73262a9
--- /dev/null
+++ b/samples/bpf/utils.c
@@ -0,0 +1,276 @@
+/* eBPF mini library */
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <linux/unistd.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <string.h>
+#include <linux/netlink.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <linux/if_packet.h>
+#include <arpa/inet.h>
+#include <linux/perf_event.h>
+#include "utils.h"
+
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
+int open_raw_sock(const char *name)
+{
+ struct sockaddr_ll sll;
+ int sock;
+
+ sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
+ if (sock < 0) {
+ printf("cannot create raw socket\n");
+ return -1;
+ }
+
+ memset(&sll, 0, sizeof(sll));
+ sll.sll_family = AF_PACKET;
+ sll.sll_ifindex = if_nametoindex(name);
+ sll.sll_protocol = htons(ETH_P_ALL);
+ if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
+ printf("bind to %s: %s\n", name, strerror(errno));
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
+
+void read_trace_pipe(void)
+{
+ int trace_fd;
+
+ trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+ if (trace_fd < 0)
+ return;
+
+ while (1) {
+ static char buf[4096];
+ ssize_t sz;
+
+ sz = read(trace_fd, buf, sizeof(buf));
+ if (sz > 0) {
+ buf[sz] = 0;
+ puts(buf);
+ }
+ }
+}
+
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+ int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
+}
+
+static int prog_load_prep(struct bpf_program *prog, int n,
+ struct bpf_insn *insns, int insns_cnt,
+ struct bpf_prog_prep_result *res)
+{
+ enum bpf_prog_type prog_type;
+ int is_socket, is_kprobe, is_kretprobe;
+ const char *event = bpf_program__title(prog, false);
+
+ LIBBPF_PTR_ASSERT(event, return -1);
+
+ is_socket = strncmp(event, "socket", 6) == 0;
+ is_kprobe = strncmp(event, "kprobe/", 7) == 0;
+ is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
+
+ if (is_socket) {
+ prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ } else if (is_kprobe || is_kretprobe) {
+ prog_type = BPF_PROG_TYPE_KPROBE;
+ } else {
+ fprintf(stderr, "Unknown event '%s'\n", event);
+ return -1;
+ }
+
+ LIBBPF_ASSERT(bpf_program__set_type(prog, prog_type), return -1);
+ res->new_insn_ptr = insns;
+ res->new_insn_cnt = insns_cnt;
+ return 0;
+}
+
+static int populate_prog_array(int map_fd, struct bpf_object *obj)
+{
+ struct bpf_program *prog;
+
+ if (map_fd < 0) {
+ fprintf(stderr, "Invalid map fd\n");
+ return -1;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ const char *event = bpf_program__title(prog, false);
+ int ind, prog_fd;
+ const char *ptr;
+
+ LIBBPF_PTR_ASSERT(event, return -1);
+ ptr = event + strlen(event) - 1;
+ while (isdigit(*ptr))
+ ptr--;
+ ptr++;
+ if (!isdigit(*ptr)) {
+ fprintf(stderr, "Invalid event: %s\n", event);
+ return -1;
+ }
+
+ ind = atoi(ptr);
+
+ __LIBBPF_ASSERT(prog_fd = bpf_program__nth_fd(prog, 0),
+ >= 0, return -1);
+ LIBBPF_ASSERT(bpf_map_update_elem(map_fd, &ind,
+ &prog_fd, BPF_ANY),
+ return -1);
+ }
+ return 0;
+}
+
+static int create_kprobes(int fd, const char *event, bool is_kprobe)
+{
+ char buf[256];
+ int efd, err, id;
+ struct perf_event_attr attr = {};
+
+ if (isdigit(event[0]))
+ return 0;
+
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ snprintf(buf, sizeof(buf),
+ "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
+ is_kprobe ? 'p' : 'r', event, event);
+
+ err = system(buf);
+ if (err < 0) {
+ fprintf(stderr, "failed to create kprobe '%s' error '%s'\n",
+ event, strerror(errno));
+ return -1;
+ }
+
+ strcpy(buf, DEBUGFS);
+ strcat(buf, "events/kprobes/");
+ strcat(buf, event);
+ strcat(buf, "/id");
+
+ efd = open(buf, O_RDONLY, 0);
+ if (efd < 0) {
+ fprintf(stderr, "failed to open event %s\n", event);
+ return -1;
+ }
+
+ err = read(efd, buf, sizeof(buf));
+ if (err < 0 || err >= sizeof(buf)) {
+ fprintf(stderr, "read from '%s' failed '%s'\n",
+ event, strerror(errno));
+ return -1;
+ }
+
+ close(efd);
+
+ buf[err] = 0;
+ id = atoi(buf);
+ attr.config = id;
+
+ efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+ if (efd < 0) {
+ fprintf(stderr, "event %d fd %d err %s\n", id, efd,
+ strerror(errno));
+ return -1;
+ }
+
+ ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+ ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+ return 0;
+}
+
+struct bpf_object *load_bpf_file(char *path)
+{
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ int err;
+
+ /* clear all kprobes */
+ err = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+ if (err)
+ fprintf(stderr, "WARNING: clear kprobe_events failed: %s\n", strerror(errno));
+
+ LIBBPF_PTR_ASSERT(obj = bpf_object__open(path), return NULL);
+
+ bpf_object__for_each_program(prog, obj)
+ LIBBPF_ASSERT(bpf_program__set_prep(prog, 1, prog_load_prep),
+ goto errout);
+
+ LIBBPF_ASSERT(bpf_object__load(obj), goto errout);
+
+ bpf_map__for_each(map, obj) {
+ struct bpf_map_def def;
+
+ LIBBPF_ASSERT(bpf_map__get_def(map, &def), goto errout);
+ if (def.type == BPF_MAP_TYPE_PROG_ARRAY) {
+ if (populate_prog_array(bpf_map__get_fd(map), obj)) {
+ fprintf(stderr, "failed to populate program array\n");
+ goto errout;
+ }
+ }
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ const char *event = bpf_program__title(prog, false);
+ int fd, err;
+
+ LIBBPF_PTR_ASSERT(event, goto errout);
+ __LIBBPF_ASSERT(fd = bpf_program__nth_fd(prog, 0),
+ >= 0,
+ goto errout);
+
+ if (strncmp(event, "kprobe/", 7) == 0)
+ err = create_kprobes(fd, event + 7, true);
+ else if (strncmp(event, "kretprobe/", 10) == 0)
+ err = create_kprobes(fd, event + 10, false);
+
+ if (err) {
+ fprintf(stderr, "failed to create kprobes\n");
+ goto errout;
+ }
+ }
+
+ return obj;
+errout:
+ bpf_object__close(obj);
+ return NULL;
+}
+
+int get_prog_fd(struct bpf_object *obj, int idx)
+{
+ int i = 0;
+ struct bpf_program *prog;
+
+ bpf_object__for_each_program(prog, obj)
+ if (i++ == idx)
+ return bpf_program__nth_fd(prog, 0);
+ return -1;
+}
+
+int get_map_fd(struct bpf_object *obj, int idx)
+{
+ int i = 0;
+ struct bpf_map *map;
+
+ bpf_map__for_each(map, obj)
+ if (i++ == idx)
+ return bpf_map__get_fd(map);
+ return -1;
+}
diff --git a/samples/bpf/utils.h b/samples/bpf/utils.h
new file mode 100644
index 0000000..5962a68
--- /dev/null
+++ b/samples/bpf/utils.h
@@ -0,0 +1,217 @@
+#ifndef __SAMPELS_UTILS_H
+#define __SAMPELS_UTILS_H
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD 1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#define __LIBBPF_ASSERT(stat, cond, ret) do { \
+ char ___errbuf[256]; \
+ int ___err = stat; \
+ \
+ if ((___err) cond) \
+ break; \
+ libbpf_strerror(___err, ___errbuf, sizeof(___errbuf));\
+ fprintf(stderr, "libbpf error: %s\n", ___errbuf);\
+ ret; \
+} while(0)
+
+#define __LIBBPF_PTR_ASSERT(stat, cond, ret) do { \
+ const void *___ptr = stat; \
+ \
+ if (!IS_ERR(___ptr) && ___ptr) \
+ break; \
+ if (!___ptr) \
+ ___ptr = ERR_PTR(-EEXIST); \
+ LIBBPF_ASSERT(PTR_ERR(___ptr), ret); \
+} while(0)
+
+#define LIBBPF_ASSERT(stat, ret) __LIBBPF_ASSERT(stat, == 0, ret)
+#define LIBBPF_PTR_ASSERT(stat, ret) __LIBBPF_PTR_ASSERT(stat, == 0, ret)
+
+/* create RAW socket and bind to interface 'name' */
+int open_raw_sock(const char *name);
+void read_trace_pipe(void);
+
+struct perf_event_attr;
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+ int group_fd, unsigned long flags);
+
+int prog_load_prepare(struct bpf_program *prog, int n,
+ struct bpf_insn *insns, int insns_cnt,
+ struct bpf_prog_prep_result *res);
+
+struct bpf_object *load_bpf_file(char *path);
+int get_prog_fd(struct bpf_object *obj, int idx);
+int get_map_fd(struct bpf_object *obj, int idx);
+#endif
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/