[lkp] [mm] 112b650f83e: -6.8% will-it-scale.per_process_ops

From: kernel test robot
Date: Wed Jul 29 2015 - 22:03:28 EST


FYI, we noticed the below changes on

git://git.kernel.org/pub/scm/linux/kernel/git/andrea/aa.git master
commit 112b650f83e5ccea260708f8b7ca747580584659 ("mm: gup: make get_user_pages_fast and __get_user_pages_fast latency conscious")


=========================================================================================
tbox_group/testcase/rootfs/kconfig/compiler/cpufreq_governor/test:
lkp-sbx04/will-it-scale/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/performance/futex1

commit:
b7c3d6a0d545317e63ff58a1e60059ce79ac359e
112b650f83e5ccea260708f8b7ca747580584659

b7c3d6a0d545317e 112b650f83e5ccea260708f8b7
---------------- --------------------------
%stddev %change %stddev
\ | \
5155618 ± 0% -6.8% 4806190 ± 0% will-it-scale.per_process_ops
1203105 ± 1% -3.0% 1166790 ± 0% will-it-scale.per_thread_ops
18828 ± 11% +97.6% 37211 ± 5% will-it-scale.time.involuntary_context_switches
80265 ± 8% -20.5% 63818 ± 11% numa-numastat.node0.numa_hit
18828 ± 11% +97.6% 37211 ± 5% time.involuntary_context_switches
3600 ± 3% +20.6% 4341 ± 5% vmstat.system.cs
227062 ± 3% +52.6% 346589 ± 1% latency_stats.hits.pipe_wait.pipe_read.__vfs_read.vfs_read.SyS_read.entry_SYSCALL_64_fastpath
5878405 ± 0% +19.7% 7035430 ± 0% latency_stats.sum.pipe_wait.pipe_read.__vfs_read.vfs_read.SyS_read.entry_SYSCALL_64_fastpath
7597 ± 12% -36.0% 4860 ± 32% numa-meminfo.node0.AnonPages
5012 ± 50% +59.1% 7975 ± 31% numa-meminfo.node2.Mapped
12346 ± 14% +24.2% 15338 ± 3% numa-meminfo.node3.SReclaimable
3516 ± 3% +10.0% 3867 ± 3% slabinfo.kmalloc-2048.active_objs
20418 ± 3% -10.9% 18198 ± 5% slabinfo.kmalloc-256.active_objs
344.25 ± 6% -25.9% 255.00 ± 14% slabinfo.kmem_cache.active_objs
344.25 ± 6% -25.9% 255.00 ± 14% slabinfo.kmem_cache.num_objs
683.00 ± 4% -16.4% 571.00 ± 7% slabinfo.kmem_cache_node.active_objs
752.00 ± 3% -14.9% 640.00 ± 7% slabinfo.kmem_cache_node.num_objs
1899 ± 12% -36.0% 1214 ± 32% numa-vmstat.node0.nr_anon_pages
113103 ± 8% -11.7% 99882 ± 4% numa-vmstat.node0.numa_hit
78508 ± 13% -16.9% 65214 ± 8% numa-vmstat.node0.numa_local
95.75 ± 24% -56.7% 41.50 ± 41% numa-vmstat.node2.nr_dirtied
1252 ± 50% +59.1% 1993 ± 31% numa-vmstat.node2.nr_mapped
93.25 ± 24% -57.1% 40.00 ± 44% numa-vmstat.node2.nr_written
3086 ± 14% +24.2% 3834 ± 3% numa-vmstat.node3.nr_slab_reclaimable
2.26 ± 2% -36.9% 1.42 ± 2% perf-profile.cpu-cycles.___might_sleep.__might_sleep.get_futex_key.futex_wake.do_futex
0.41 ± 5% +140.2% 0.98 ± 1% perf-profile.cpu-cycles.___might_sleep.get_futex_key.futex_wake.do_futex.sys_futex
0.00 ± -1% +Inf% 3.65 ± 1% perf-profile.cpu-cycles.___might_sleep.get_user_pages_fast.get_futex_key.futex_wake.do_futex
3.21 ± 2% -31.6% 2.20 ± 1% perf-profile.cpu-cycles.__might_sleep.get_futex_key.futex_wake.do_futex.sys_futex
4.42 ± 1% -11.8% 3.90 ± 1% perf-profile.cpu-cycles.get_futex_key_refs.isra.10.futex_wake.do_futex.sys_futex.entry_SYSCALL_64_fastpath
26.58 ± 0% +18.8% 31.59 ± 0% perf-profile.cpu-cycles.get_user_pages_fast.get_futex_key.futex_wake.do_futex.sys_futex
14.18 ± 1% -9.6% 12.81 ± 0% perf-profile.cpu-cycles.unlock_page.get_futex_key.futex_wake.do_futex.sys_futex
193.00 ± 55% -60.1% 77.00 ± 87% sched_debug.cfs_rq[10]:/.blocked_load_avg
209.25 ± 51% -73.2% 56.00 ±107% sched_debug.cfs_rq[10]:/.tg_load_contrib
0.50 ±100% +350.0% 2.25 ± 72% sched_debug.cfs_rq[14]:/.nr_spread_over
65.00 ±154% -96.2% 2.50 ±100% sched_debug.cfs_rq[15]:/.blocked_load_avg
81.00 ±124% -79.9% 16.25 ± 13% sched_debug.cfs_rq[15]:/.tg_load_contrib
217.25 ± 96% -61.4% 83.75 ±167% sched_debug.cfs_rq[17]:/.blocked_load_avg
219.75 ± 95% -60.8% 86.25 ±161% sched_debug.cfs_rq[17]:/.tg_load_contrib
9485 ± 11% +16.1% 11016 ± 6% sched_debug.cfs_rq[19]:/.tg_load_avg
9453 ± 12% +16.5% 11015 ± 6% sched_debug.cfs_rq[20]:/.tg_load_avg
8076 ± 0% +13.7% 9178 ± 11% sched_debug.cfs_rq[21]:/.avg->runnable_avg_sum
9442 ± 11% +16.6% 11010 ± 6% sched_debug.cfs_rq[21]:/.tg_load_avg
175.00 ± 0% +13.4% 198.50 ± 11% sched_debug.cfs_rq[21]:/.tg_runnable_contrib
9432 ± 12% +16.7% 11010 ± 6% sched_debug.cfs_rq[22]:/.tg_load_avg
9473 ± 12% +15.9% 10977 ± 6% sched_debug.cfs_rq[23]:/.tg_load_avg
175.00 ± 2% +7.7% 188.50 ± 5% sched_debug.cfs_rq[27]:/.tg_runnable_contrib
35551 ± 12% -10.3% 31893 ± 1% sched_debug.cfs_rq[50]:/.exec_clock
21745 ± 6% +8.6% 23612 ± 6% sched_debug.cfs_rq[56]:/.exec_clock
73.75 ±100% +526.4% 462.00 ± 54% sched_debug.cfs_rq[63]:/.blocked_load_avg
84.25 ± 88% +460.8% 472.50 ± 52% sched_debug.cfs_rq[63]:/.tg_load_contrib
0.50 ±100% +550.0% 3.25 ± 25% sched_debug.cfs_rq[9]:/.nr_spread_over
10053 ± 27% +101.2% 20226 ± 14% sched_debug.cpu#0.nr_switches
19037 ± 14% +54.0% 29326 ± 10% sched_debug.cpu#0.sched_count
2136 ± 12% +236.9% 7198 ± 32% sched_debug.cpu#0.sched_goidle
15374 ± 36% -57.4% 6548 ± 67% sched_debug.cpu#1.nr_switches
15861 ± 35% -54.4% 7236 ± 60% sched_debug.cpu#1.sched_count
6837 ± 44% -55.2% 3060 ± 71% sched_debug.cpu#1.sched_goidle
2.00 ± 0% +50.0% 3.00 ± 0% sched_debug.cpu#17.cpu_load[2]
3269 ± 52% +182.5% 9233 ± 65% sched_debug.cpu#19.ttwu_count
1684 ± 13% -29.3% 1191 ± 15% sched_debug.cpu#23.ttwu_local
1275 ± 20% +203.1% 3865 ± 58% sched_debug.cpu#24.ttwu_local
-6.25 ±-54% -100.0% 0.00 ± 0% sched_debug.cpu#25.nr_uninterruptible
1.50 ±233% -650.0% -8.25 ±-97% sched_debug.cpu#30.nr_uninterruptible
-1.00 ±-187% -325.0% 2.25 ± 79% sched_debug.cpu#32.nr_uninterruptible
3.50 ± 47% -114.3% -0.50 ±-331% sched_debug.cpu#34.nr_uninterruptible
3.25 ± 39% -76.9% 0.75 ±145% sched_debug.cpu#37.nr_uninterruptible
236.50 ± 25% +597.6% 1649 ±131% sched_debug.cpu#39.sched_goidle
1.50 ±137% +316.7% 6.25 ± 45% sched_debug.cpu#41.nr_uninterruptible
984.50 ± 80% -63.9% 355.75 ± 40% sched_debug.cpu#42.ttwu_local
241.00 ± 7% +102.3% 487.50 ± 25% sched_debug.cpu#44.ttwu_local
522.50 ± 6% +385.2% 2535 ± 91% sched_debug.cpu#45.sched_goidle
1481 ± 17% +972.4% 15890 ±117% sched_debug.cpu#47.nr_switches
1566 ± 16% +925.2% 16056 ±117% sched_debug.cpu#47.sched_count
618.00 ± 19% +1162.9% 7804 ±119% sched_debug.cpu#47.sched_goidle
4222 ± 70% -61.6% 1619 ± 9% sched_debug.cpu#5.nr_switches
4705 ± 63% -52.6% 2229 ± 14% sched_debug.cpu#5.sched_count
1971 ± 76% -65.4% 682.00 ± 9% sched_debug.cpu#5.sched_goidle
1835 ± 36% +215.7% 5792 ± 36% sched_debug.cpu#53.nr_switches
1961 ± 37% +197.9% 5843 ± 36% sched_debug.cpu#53.sched_count
396.00 ± 19% +402.3% 1989 ± 71% sched_debug.cpu#53.ttwu_local
1803 ± 13% -32.6% 1215 ± 10% sched_debug.cpu#6.nr_switches
766.25 ± 13% -34.5% 502.25 ± 10% sched_debug.cpu#6.sched_goidle
395.50 ± 10% -52.3% 188.50 ± 15% sched_debug.cpu#6.ttwu_local
1105 ± 33% +788.4% 9823 ±107% sched_debug.cpu#60.sched_goidle


lkp-sbx04: Sandy Bridge-EX
Memory: 64G

will-it-scale.per_process_ops

5.2e+06 ++------------------*--------------------------------------------+
*.. .*..*..*. .*..*..*...* |
5.15e+06 ++ *...*. *..*..*...*. |
5.1e+06 ++ |
| |
5.05e+06 ++ |
5e+06 ++ |
| |
4.95e+06 ++ |
4.9e+06 ++ |
| O O |
4.85e+06 ++ |
4.8e+06 ++ O O O O O O O O |
O O O O O O O O O O
4.75e+06 ++-O-------------------------------------------------------------+

[*] bisect-good sample
[O] bisect-bad sample

To reproduce:

git clone git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git
cd lkp-tests
bin/lkp install job.yaml # job file is attached in this email
bin/lkp run job.yaml


Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.


Thanks,
Ying Huang
---
LKP_SERVER: inn
LKP_CGI_PORT: 80
LKP_CIFS_PORT: 139
testcase: will-it-scale
default-monitors:
wait: activate-monitor
kmsg:
uptime:
iostat:
vmstat:
numa-numastat:
numa-vmstat:
numa-meminfo:
proc-vmstat:
proc-stat:
interval: 10
meminfo:
slabinfo:
interrupts:
lock_stat:
latency_stats:
softirqs:
bdi_dev_mapping:
diskstats:
nfsstat:
cpuidle:
cpufreq-stats:
turbostat:
pmeter:
sched_debug:
interval: 60
cpufreq_governor: performance
default-watchdogs:
oom-killer:
watchdog:
commit: 752f49cf691de1a914be41c78111a0877af986ba
model: Sandy Bridge-EX
nr_cpu: 64
memory: 64G
nr_ssd_partitions: 4
ssd_partitions: "/dev/disk/by-id/ata-INTEL_SSDSC2CW240A3_CVCV20430*-part1"
swap_partitions:
category: benchmark
perf-profile:
freq: 800
will-it-scale:
test: futex1
queue: cyclic
testbox: lkp-sbx04
tbox_group: lkp-sbx04
kconfig: x86_64-rhel
enqueue_time: 2015-07-27 11:24:32.572062861 +08:00
user: lkp
compiler: gcc-4.9
head_commit: 752f49cf691de1a914be41c78111a0877af986ba
base_commit: cbfe8fa6cd672011c755c3cd85c9ffd4e2d10a6f
branch: linux-devel/devel-hourly-2015072717
kernel: "/pkg/linux/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/vmlinuz-4.2.0-rc4-wl-ath-02023-g752f49c"
rootfs: debian-x86_64-2015-02-07.cgz
result_root: "/result/will-it-scale/performance-futex1/lkp-sbx04/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/0"
job_file: "/lkp/scheduled/lkp-sbx04/cyclic_will-it-scale-performance-futex1-x86_64-rhel-CYCLIC_HEAD-752f49cf691de1a914be41c78111a0877af986ba-20150727-107044-1mg92jq-0.yaml"
dequeue_time: 2015-07-27 18:08:44.809469793 +08:00
max_uptime: 1500
initrd: "/osimage/debian/debian-x86_64-2015-02-07.cgz"
bootloader_append:
- root=/dev/ram0
- user=lkp
- job=/lkp/scheduled/lkp-sbx04/cyclic_will-it-scale-performance-futex1-x86_64-rhel-CYCLIC_HEAD-752f49cf691de1a914be41c78111a0877af986ba-20150727-107044-1mg92jq-0.yaml
- ARCH=x86_64
- kconfig=x86_64-rhel
- branch=linux-devel/devel-hourly-2015072717
- commit=752f49cf691de1a914be41c78111a0877af986ba
- BOOT_IMAGE=/pkg/linux/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/vmlinuz-4.2.0-rc4-wl-ath-02023-g752f49c
- max_uptime=1500
- RESULT_ROOT=/result/will-it-scale/performance-futex1/lkp-sbx04/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/0
- LKP_SERVER=inn
- |2-


earlyprintk=ttyS0,115200 systemd.log_level=err
debug apic=debug sysrq_always_enabled rcupdate.rcu_cpu_stall_timeout=100
panic=-1 softlockup_panic=1 nmi_watchdog=panic oops=panic load_ramdisk=2 prompt_ramdisk=0
console=ttyS0,115200 console=tty0 vga=normal

rw
lkp_initrd: "/lkp/lkp/lkp-x86_64.cgz"
modules_initrd: "/pkg/linux/x86_64-rhel/gcc-4.9/752f49cf691de1a914be41c78111a0877af986ba/modules.cgz"
bm_initrd: "/osimage/deps/debian-x86_64-2015-02-07.cgz/lkp.cgz,/osimage/deps/debian-x86_64-2015-02-07.cgz/run-ipconfig.cgz,/osimage/deps/debian-x86_64-2015-02-07.cgz/turbostat.cgz,/lkp/benchmarks/turbostat.cgz,/lkp/benchmarks/will-it-scale.cgz"
job_state: finished
loadavg: 46.70 20.74 8.06 1/624 11372
start_time: '1437991880'
end_time: '1437992189'
version: "/lkp/lkp/.src-20150727-150448"
echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu10/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu11/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu13/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu14/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu15/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu16/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu17/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu18/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu19/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu2/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu20/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu21/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu22/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu23/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu24/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu25/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu26/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu27/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu28/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu29/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu3/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu30/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu31/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu32/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu33/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu34/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu35/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu36/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu37/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu38/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu39/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu4/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu40/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu41/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu42/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu43/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu44/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu45/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu46/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu47/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu48/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu49/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu5/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu50/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu51/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu52/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu53/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu54/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu55/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu56/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu57/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu58/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu59/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu6/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu60/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu61/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu62/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu63/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu7/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu8/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu9/cpufreq/scaling_governor
./runtest.py futex1 16 both 1 8 16 24 32 48 64