Re: [PATCH] x86: Add optimized popcnt variants

From: H. Peter Anvin
Date: Fri Feb 19 2010 - 11:11:04 EST


On 02/19/2010 06:22 AM, Borislav Petkov wrote:
> --- /dev/null
> +++ b/arch/x86/lib/hweight.c
> @@ -0,0 +1,62 @@
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/bitops.h>
> +
> +#ifdef CONFIG_64BIT
> +/* popcnt %rdi, %rax */
> +#define POPCNT ".byte 0xf3\n\t.byte 0x48\n\t.byte 0x0f\n\t.byte 0xb8\n\t.byte 0xc7"
> +#define REG_IN "D"
> +#define REG_OUT "a"
> +#else
> +/* popcnt %eax, %eax */
> +#define POPCNT ".byte 0xf3\n\t.byte 0x0f\n\t.byte 0xb8\n\t.byte 0xc0"
> +#define REG_IN "a"
> +#define REG_OUT "a"
> +#endif
> +
> +/*
> + * __sw_hweightXX are called from within the alternatives below
> + * and callee-clobbered registers need to be taken care of. See
> + * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
> + * compiler switches.
> + */
> +unsigned int __arch_hweight32(unsigned int w)
> +{
> + unsigned int res = 0;
> +
> + asm (ALTERNATIVE("call __sw_hweight32", POPCNT, X86_FEATURE_POPCNT)
> + : "="REG_OUT (res)
> + : REG_IN (w));
> +
> + return res;
> +}
> +EXPORT_SYMBOL(__arch_hweight32);
> +
> +unsigned int __arch_hweight16(unsigned int w)
> +{
> + return __arch_hweight32(w & 0xffff);
> +}
> +EXPORT_SYMBOL(__arch_hweight16);
> +
> +unsigned int __arch_hweight8(unsigned int w)
> +{
> + return __arch_hweight32(w & 0xff);
> +}
> +EXPORT_SYMBOL(__arch_hweight8);
> +
> +unsigned long __arch_hweight64(__u64 w)
> +{
> + unsigned long res = 0;
> +
> +#ifdef CONFIG_X86_32
> + return __arch_hweight32((u32)w) +
> + __arch_hweight32((u32)(w >> 32));
> +#else
> + asm (ALTERNATIVE("call __sw_hweight64", POPCNT, X86_FEATURE_POPCNT)
> + : "="REG_OUT (res)
> + : REG_IN (w));
> +#endif /* CONFIG_X86_32 */
> +
> + return res;
> +}

You're still not inlining these. They should be: there is absolutely no
reason for code size to not inline them anymore.

> diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h
> index 3a7be84..1c82306 100644
> --- a/include/asm-generic/bitops/arch_hweight.h
> +++ b/include/asm-generic/bitops/arch_hweight.h
> @@ -3,9 +3,23 @@
>
> #include <asm/types.h>
>
> -extern unsigned int __arch_hweight32(unsigned int w);
> -extern unsigned int __arch_hweight16(unsigned int w);
> -extern unsigned int __arch_hweight8(unsigned int w);
> -extern unsigned long __arch_hweight64(__u64 w);
> +unsigned int __arch_hweight32(unsigned int w)
> +{
> + return __sw_hweight32(w);
> +}
>
> +unsigned int __arch_hweight16(unsigned int w)
> +{
> + return __sw_hweight16(w);
> +}
> +
> +unsigned int __arch_hweight8(unsigned int w)
> +{
> + return __sw_hweight8(w);
> +}
> +
> +unsigned long __arch_hweight64(__u64 w)
> +{
> + return __sw_hweight64(w);
> +}
> #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */

and these are in a header file and *definitely* should be inlines.

-hpa
--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/