[PATCH net-next 4/4] bonding: balance ICMP echoes in layer3+4 mode

From: Matteo Croce
Date: Mon Oct 21 2019 - 16:11:45 EST


The bonding uses the L4 ports to balance flows between slaves.
As the ICMP protocol has no ports, those packets are sent all to the
same device:

# tcpdump -qltnni veth0 ip |sed 's/^/0: /' &
# tcpdump -qltnni veth1 ip |sed 's/^/1: /' &
# ping -qc1 192.168.0.2
1: IP 192.168.0.1 > 192.168.0.2: ICMP echo request, id 315, seq 1, length 64
1: IP 192.168.0.2 > 192.168.0.1: ICMP echo reply, id 315, seq 1, length 64
# ping -qc1 192.168.0.2
1: IP 192.168.0.1 > 192.168.0.2: ICMP echo request, id 316, seq 1, length 64
1: IP 192.168.0.2 > 192.168.0.1: ICMP echo reply, id 316, seq 1, length 64
# ping -qc1 192.168.0.2
1: IP 192.168.0.1 > 192.168.0.2: ICMP echo request, id 317, seq 1, length 64
1: IP 192.168.0.2 > 192.168.0.1: ICMP echo reply, id 317, seq 1, length 64

But some ICMP packets have an Identifier field which is
used to match packets within sessions, let's use this value in the hash
function to balance these packets between bond slaves:

# ping -qc1 192.168.0.2
0: IP 192.168.0.1 > 192.168.0.2: ICMP echo request, id 303, seq 1, length 64
0: IP 192.168.0.2 > 192.168.0.1: ICMP echo reply, id 303, seq 1, length 64
# ping -qc1 192.168.0.2
1: IP 192.168.0.1 > 192.168.0.2: ICMP echo request, id 304, seq 1, length 64
1: IP 192.168.0.2 > 192.168.0.1: ICMP echo reply, id 304, seq 1, length 64

Signed-off-by: Matteo Croce <mcroce@xxxxxxxxxx>
---
drivers/net/bonding/bond_main.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 21d8fcc83c9c..83afb03f4d07 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3267,6 +3267,8 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
return skb_flow_dissect_flow_keys(skb, fk, 0);

fk->ports.ports = 0;
+ fk->icmp.icmp = 0;
+ fk->icmp.id = 0;
noff = skb_network_offset(skb);
if (skb->protocol == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
@@ -3286,8 +3288,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
} else {
return false;
}
- if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
- fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) {
+ if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)
+ skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
+ skb_transport_offset(skb),
+ skb_headlen(skb));
+ else
+ fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
+ }

return true;
}
@@ -3314,10 +3322,14 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
return bond_eth_hash(skb);

if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
- bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
+ bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
hash = bond_eth_hash(skb);
- else
- hash = (__force u32)flow.ports.ports;
+ } else {
+ if (flow.icmp.id)
+ memcpy(&hash, &flow.icmp, sizeof(hash));
+ else
+ memcpy(&hash, &flow.ports.ports, sizeof(hash));
+ }
hash ^= (__force u32)flow_get_u32_dst(&flow) ^
(__force u32)flow_get_u32_src(&flow);
hash ^= (hash >> 16);
--
2.21.0