Re: [PATCH v4 bpf 1/2] bpf: fix skb_do_redirect return values

From: Dan Carpenter
Date: Wed Jul 26 2023 - 09:39:25 EST


I'm not positive I understand the code in ip_finish_output2(). I think
instead of looking for LWTUNNEL_XMIT_DONE it should instead look for
!= LWTUNNEL_XMIT_CONTINUE. It's unfortunate that NET_XMIT_DROP and
LWTUNNEL_XMIT_CONTINUE are the both 0x1. Why don't we just change that
instead?

Also there seems to be a leak in lwtunnel_xmit(). Should that return
LWTUNNEL_XMIT_CONTINUE or should it call kfree_skb() before returning?

Something like the following?

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 11652e464f5d..375790b672bc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -112,6 +112,9 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev);
#define NET_XMIT_CN 0x02 /* congestion notification */
#define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */

+#define LWTUNNEL_XMIT_DONE NET_XMIT_SUCCESS
+#define LWTUNNEL_XMIT_CONTINUE 0x3
+
/* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
* indicates that the device will soon be dropping packets, or already drops
* some packets of the same priority; prompting us to send less aggressively. */
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 6f15e6fa154e..8ab032ee04d0 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -16,12 +16,6 @@
#define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1)
#define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2)

-enum {
- LWTUNNEL_XMIT_DONE,
- LWTUNNEL_XMIT_CONTINUE,
-};
-
-
struct lwtunnel_state {
__u16 type;
__u16 flags;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 711cd3b4347a..732415d1287d 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -371,7 +371,7 @@ int lwtunnel_xmit(struct sk_buff *skb)

if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
lwtstate->type > LWTUNNEL_ENCAP_MAX)
- return 0;
+ return LWTUNNEL_XMIT_CONTINUE;

ret = -EOPNOTSUPP;
rcu_read_lock();
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6e70839257f7..4be50a211b14 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -216,7 +216,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);

- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1e8c90e97608..016b0a513259 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -113,7 +113,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);

- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}