Re: [PATCH net-next] gro: avoid checking for a failed search

From: Eric Dumazet
Date: Wed Nov 02 2022 - 14:21:23 EST


On Wed, Nov 2, 2022 at 9:46 AM Richard Gobert <richardbgobert@xxxxxxxxx> wrote:
>
> > Why does it matter? You see a measurable perf win?
>
> In the common case, we will exit the loop with a break,
> so this patch eliminates an unnecessary check.
>
> On some architectures this optimization might be done
> automatically by the compiler, but I think it will be better
> to make it explicit here. Although on x86 this optimization
> happens automatically, I noticed that on my build target
> (ARM/GCC) this does change the binary.

What about taking this as an opportunity to reduce the indentation
level by one tab ?

Untested patch:

diff --git a/net/core/gro.c b/net/core/gro.c
index bc9451743307bc380cca96ae6995aa0a3b83d185..ddfe92c9a5e869d241931b72d6b3426a0e858468
100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -491,43 +491,44 @@ static enum gro_result dev_gro_receive(struct
napi_struct *napi, struct sk_buff
list_for_each_entry_rcu(ptype, head, list) {
if (ptype->type != type || !ptype->callbacks.gro_receive)
continue;
+ goto found_ptype;
+ }
+ rcu_read_unlock();
+ goto normal;

- skb_set_network_header(skb, skb_gro_offset(skb));
- skb_reset_mac_len(skb);
- BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed)
!= sizeof(u32));
- BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
- sizeof(u32))); /* Avoid slow
unaligned acc */
- *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
- NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
- NAPI_GRO_CB(skb)->is_atomic = 1;
- NAPI_GRO_CB(skb)->count = 1;
- if (unlikely(skb_is_gso(skb))) {
- NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
- /* Only support TCP at the moment. */
- if (!skb_is_gso_tcp(skb))
- NAPI_GRO_CB(skb)->flush = 1;
- }
-
- /* Setup for GRO checksum validation */
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- NAPI_GRO_CB(skb)->csum = skb->csum;
- NAPI_GRO_CB(skb)->csum_valid = 1;
- break;
- case CHECKSUM_UNNECESSARY:
- NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
- break;
- }
+found_ptype:
+ skb_set_network_header(skb, skb_gro_offset(skb));
+ skb_reset_mac_len(skb);
+ BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
+ sizeof(u32))); /* Avoid slow unaligned acc */
+ *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
+ NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
+ NAPI_GRO_CB(skb)->is_atomic = 1;
+ NAPI_GRO_CB(skb)->count = 1;
+ if (unlikely(skb_is_gso(skb))) {
+ NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
+ /* Only support TCP at the moment. */
+ if (!skb_is_gso_tcp(skb))
+ NAPI_GRO_CB(skb)->flush = 1;
+ }

- pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
- ipv6_gro_receive, inet_gro_receive,
- &gro_list->list, skb);
+ /* Setup for GRO checksum validation */
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ NAPI_GRO_CB(skb)->csum = skb->csum;
+ NAPI_GRO_CB(skb)->csum_valid = 1;
+ break;
+ case CHECKSUM_UNNECESSARY:
+ NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
break;
}
- rcu_read_unlock();

- if (&ptype->list == head)
- goto normal;
+ pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
+ ipv6_gro_receive, inet_gro_receive,
+ &gro_list->list, skb);
+
+ rcu_read_unlock();

if (PTR_ERR(pp) == -EINPROGRESS) {
ret = GRO_CONSUMED;