Re: [PATCH v2 bpf-next 07/13] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction

From: Yonghong Song
Date: Sat Nov 28 2020 - 13:02:52 EST




On 11/27/20 9:57 AM, Brendan Jackman wrote:
This value can be set in bpf_insn.imm, for BPF_ATOMIC instructions,
in order to have the previous value of the atomically-modified memory
location loaded into the src register after an atomic op is carried
out.

Suggested-by: Yonghong Song <yhs@xxxxxx>
Signed-off-by: Brendan Jackman <jackmanb@xxxxxxxxxx>
---
arch/x86/net/bpf_jit_comp.c | 4 ++++
include/linux/filter.h | 9 +++++++++
include/uapi/linux/bpf.h | 3 +++
kernel/bpf/core.c | 13 +++++++++++++
kernel/bpf/disasm.c | 7 +++++++
kernel/bpf/verifier.c | 35 ++++++++++++++++++++++++----------
tools/include/linux/filter.h | 10 ++++++++++
tools/include/uapi/linux/bpf.h | 3 +++
8 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7c47ad70ddb4..d3cd45bcd0c1 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -827,6 +827,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
/* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */
EMIT1(simple_alu_opcodes[atomic_op]);
break;
+ case BPF_ADD | BPF_FETCH:
+ /* src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */
+ EMIT2(0x0F, 0xC1);
+ break;
default:
pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
return -EFAULT;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ce19988fb312..4e04d0fc454f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -270,6 +270,15 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
.imm = BPF_ADD })
#define BPF_STX_XADD BPF_ATOMIC_ADD /* alias */
+/* Atomic memory add with fetch, src_reg = atomic_fetch_add(*(dst_reg + off), src_reg); */
+
+#define BPF_ATOMIC_FETCH_ADD(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = BPF_ADD | BPF_FETCH })
/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d0adc48db43c..025e377e7229 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -44,6 +44,9 @@
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
+/* atomic op type fields (stored in immediate) */
+#define BPF_FETCH 0x01 /* fetch previous value into src reg */
+
/* Register numbers */
enum {
BPF_REG_0 = 0,
[...]
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e8b41ccdfb90..cd4c03b25573 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3602,7 +3602,11 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
{
int err;
- if (insn->imm != BPF_ADD) {
+ switch (insn->imm) {
+ case BPF_ADD:
+ case BPF_ADD | BPF_FETCH:
+ break;
+ default:
verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
return -EINVAL;
}
@@ -3631,7 +3635,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
is_pkt_reg(env, insn->dst_reg) ||
is_flow_key_reg(env, insn->dst_reg) ||
is_sk_reg(env, insn->dst_reg)) {
- verbose(env, "atomic stores into R%d %s is not allowed\n",
+ verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str[reg_state(env, insn->dst_reg)->type]);
return -EACCES;
@@ -3644,8 +3648,20 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
return err;
/* check whether we can write into the same memory */
- return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_WRITE, -1, true);
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+ BPF_SIZE(insn->code), BPF_WRITE, -1, true);
+ if (err)
+ return err;
+
+ if (!(insn->imm & BPF_FETCH))
+ return 0;
+
+ /* check and record load of old value into src reg */
+ err = check_reg_arg(env, insn->src_reg, DST_OP);
+ if (err)
+ return err;
+
+ return 0;
}
static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
@@ -9501,12 +9517,6 @@ static int do_check(struct bpf_verifier_env *env)
} else if (class == BPF_STX) {
enum bpf_reg_type *prev_dst_type, dst_reg_type;
- if (((BPF_MODE(insn->code) != BPF_MEM &&
- BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) {
- verbose(env, "BPF_STX uses reserved fields\n");
- return -EINVAL;
- }
-
if (BPF_MODE(insn->code) == BPF_ATOMIC) {
err = check_atomic(env, env->insn_idx, insn);
if (err)
@@ -9515,6 +9525,11 @@ static int do_check(struct bpf_verifier_env *env)
continue;
}
+ if (BPF_MODE(insn->code) != BPF_MEM && insn->imm != 0) {

"||" here instead of "&&"?

+ verbose(env, "BPF_STX uses reserved fields\n");
+ return -EINVAL;
+ }
+
/* check src1 operand */
err = check_reg_arg(env, insn->src_reg, SRC_OP);
if (err)
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
[...]