[PATCH net] ipmr: support IP_PKTINFO on cache report IGMP msg

From: Leone Fernando
Date: Wed Dec 13 2023 - 09:36:00 EST


In order to support IP_PKTINFO on those packets, we need to call
ipv4_pktinfo_prepare, so introduced minor changes to this
function to support this flow.

When sending mrouted/pimd daemons a cache report IGMP msg, it is
unnecessary to set dst on the newly created skb.
It used to be necessary on older versions until
commit d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") which
changed the way IP_PKTINFO struct is been retrieved.

Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference")
Signed-off-by: Leone Fernando <leone4fernando@xxxxxxxxx>
---
include/net/ip.h | 10 +++++++++-
net/ipv4/ip_sockglue.c | 25 ++++++++++++++++---------
net/ipv4/ipmr.c | 12 +++++-------
net/ipv4/raw.c | 2 +-
net/ipv4/udp.c | 2 +-
5 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index b31be912489a..1b40b7386c56 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -767,7 +767,15 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
* Functions provided by ip_sockglue.c
*/

-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb,
+ struct sk_buff *oskb);
+
+
+static inline void ipv4_pktinfo_input_prepare(const struct sock *sk, struct sk_buff *skb)
+{
+ ipv4_pktinfo_prepare(sk, skb, NULL);
+}
+
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, int tlen, int offset);
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d7d13940774e..fb26963e3869 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1364,19 +1364,26 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
/**
* ipv4_pktinfo_prepare - transfer some info from rtable to skb
* @sk: socket
- * @skb: buffer
+ * @iskb: input buffer
+ * @oskb: out buffer
*
* To support IP_CMSG_PKTINFO option, we store rt_iif and specific
* destination in skb->cb[] before dst drop.
* This way, receiver doesn't make cache line misses to read rtable.
*/
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb,
+ struct sk_buff *oskb)
{
- struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
+ struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(iskb);
bool prepare = inet_test_bit(PKTINFO, sk) ||
ipv6_sk_rxinfo(sk);

- if (prepare && skb_rtable(skb)) {
+ if (oskb) {
+ memcpy(oskb->cb, iskb->cb, sizeof(iskb->cb));
+ pktinfo = PKTINFO_SKB_CB(oskb);
+ }
+
+ if (prepare && skb_rtable(iskb)) {
/* skb->cb is overloaded: prior to this point it is IP{6}CB
* which has interface index (iif) as the first member of the
* underlying inet{6}_skb_parm struct. This code then overlays
@@ -1386,20 +1393,20 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
* (e.g., process binds socket to eth0 for Tx which is
* redirected to loopback in the rtable/dst).
*/
- struct rtable *rt = skb_rtable(skb);
- bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
+ struct rtable *rt = skb_rtable(iskb);
+ bool l3slave = ipv4_l3mdev_skb(IPCB(iskb)->flags);

if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
- pktinfo->ipi_ifindex = inet_iif(skb);
+ pktinfo->ipi_ifindex = inet_iif(iskb);
else if (l3slave && rt && rt->rt_iif)
pktinfo->ipi_ifindex = rt->rt_iif;

- pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
+ pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(iskb);
} else {
pktinfo->ipi_ifindex = 0;
pktinfo->ipi_spec_dst.s_addr = 0;
}
- skb_dst_drop(skb);
+ skb_dst_drop(iskb);
}

int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9e222a57bc2b..6ed7c88743f9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1025,6 +1025,10 @@ static int ipmr_cache_report(const struct mr_table *mrt,
struct sk_buff *skb;
int ret;

+ mroute_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute_sk)
+ return -EINVAL;
+
if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
else
@@ -1069,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt,
msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
msg->im_vif_hi = vifi >> 8;
- skb_dst_set(skb, dst_clone(skb_dst(pkt)));
+ ipv4_pktinfo_prepare(mroute_sk, pkt, skb);
/* Add our header */
igmp = skb_put(skb, sizeof(struct igmphdr));
igmp->type = assert;
@@ -1079,12 +1083,6 @@ static int ipmr_cache_report(const struct mr_table *mrt,
skb->transport_header = skb->network_header;
}

- mroute_sk = rcu_dereference(mrt->mroute_sk);
- if (!mroute_sk) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
igmpmsg_netlink_event(mrt, skb);

/* Deliver to mrouted */
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 27da9d7294c0..cde60c8deed4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)

/* Charge it to the socket. */

- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_input_prepare(sk, skb);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 89e5a806b82e..3e5a418c96c3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)

udp_csum_pull_header(skb);

- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_input_prepare(sk, skb);
return __udp_queue_rcv_skb(sk, skb);

csum_error:
--
2.34.1