RE: [PATCH] slab.h: Avoid using & for logical and of booleans

From: David Laight
Date: Tue Nov 06 2018 - 06:07:52 EST


From: Vlastimil Babka [mailto:vbabka@xxxxxxx]
> Sent: 06 November 2018 10:22
...
> >> - return type_dma + (is_reclaimable & !is_dma) * KMALLOC_RECLAIM;
> >> + return type_dma + is_reclaimable * !is_dma * KMALLOC_RECLAIM;
> >
> > ISTM that changing is_dma and is_reclaimable from int to bool will stop the bleating.
> >
> > It is also strange that this code is trying so hard here to avoid conditional instructions

I've done some experiments, compiled with gcc 4.7.3 and -O2
The constants match those from the kernel headers.

It is noticable that there isn't a cmov in sight.
The code would also be better if the KMALLOC constants matched the GFP ones.


#define __GFP_DMA 1u
#define __GFP_RECLAIM 0x10u

#define KMALLOC_DMA 2
#define KMALLOC_RECLAIM 1

unsigned int f(unsigned int flags)
{
return flags & __GFP_DMA ? KMALLOC_DMA : flags & __GFP_RECLAIM ? KMALLOC_RECLAIM : 0;
}

unsigned int f1(unsigned int flags)
{
return !__builtin_expect(flags & (__GFP_DMA | __GFP_RECLAIM), 0) ? 0 : flags & __GFP_DMA ? KMALLOC_DMA : KMALLOC_RECLAIM;
}

unsigned int f2(unsigned int flags)
{
int is_dma, type_dma, is_rec;

is_dma = !!(flags & __GFP_DMA);
type_dma = is_dma * KMALLOC_DMA;
is_rec = !!(flags & __GFP_RECLAIM);

return type_dma + (is_rec & !is_dma) * KMALLOC_RECLAIM;
}

unsigned int f3(unsigned int flags)
{
int is_dma, type_dma, is_rec;

is_dma = !!(flags & __GFP_DMA);
type_dma = is_dma * KMALLOC_DMA;
is_rec = !!(flags & __GFP_RECLAIM);

return type_dma + (is_rec * !is_dma) * KMALLOC_RECLAIM;
}

Disassembly of section .text:

0000000000000000 <f>:
0: 40 f6 c7 01 test $0x1,%dil
4: b8 02 00 00 00 mov $0x2,%eax
9: 74 05 je 10 <f+0x10>
b: f3 c3 repz retq
d: 0f 1f 00 nopl (%rax)
10: 89 f8 mov %edi,%eax
12: c1 e8 04 shr $0x4,%eax
15: 83 e0 01 and $0x1,%eax
18: c3 retq

This one has a misprediced branch in the common path.

0000000000000020 <f1>:
20: 40 f6 c7 11 test $0x11,%dil
24: 75 03 jne 29 <f1+0x9>
26: 31 c0 xor %eax,%eax
28: c3 retq
29: 83 e7 01 and $0x1,%edi
2c: 83 ff 01 cmp $0x1,%edi
2f: 19 c0 sbb %eax,%eax
31: 83 c0 02 add $0x2,%eax
34: c3 retq

The jne will be predicted not taken and the retq predicted.
So this might only be 1 clock in the normal case.

0000000000000040 <f2>:
40: 89 f8 mov %edi,%eax
42: c1 ef 04 shr $0x4,%edi
45: 83 e0 01 and $0x1,%eax
48: 89 c2 mov %eax,%edx
4a: 83 f2 01 xor $0x1,%edx
4d: 21 d7 and %edx,%edi
4f: 8d 04 47 lea (%rdi,%rax,2),%eax
52: c3 retq

No conditionals, but a 4 deep dependency chain.

0000000000000060 <f3>:
60: 89 fa mov %edi,%edx
62: c1 ef 04 shr $0x4,%edi
65: 83 e2 01 and $0x1,%edx
68: 83 e7 01 and $0x1,%edi
6b: 89 d0 mov %edx,%eax
6d: 83 f0 01 xor $0x1,%eax
70: 0f af c7 imul %edi,%eax
73: 8d 04 50 lea (%rax,%rdx,2),%eax
76: c3 retq

You really don't want the multiply.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)