[LKP] [futex] 76835b0ebf8: -8.1% will-it-scale.per_thread_ops

From: Huang Ying
Date: Thu Feb 26 2015 - 00:17:06 EST


FYI, we noticed the below changes on

git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
commit 76835b0ebf8a7fe85beb03c75121419a7dec52f0 ("futex: Ensure get_futex_key_refs() always implies a barrier")


testbox/testcase/testparams: lkp-wsx01/will-it-scale/performance-futex4

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
%stddev %change %stddev
\ | \
6314259 Â 0% -8.1% 5800079 Â 0% will-it-scale.per_thread_ops
6274871 Â 0% -8.1% 5768747 Â 0% will-it-scale.per_process_ops
0.64 Â 0% +4.6% 0.67 Â 1% will-it-scale.scalability
0.79 Â 2% +716.1% 6.48 Â 1% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wait_setup.futex_wait.do_futex.sys_futex
2 Â 44% +200.0% 6 Â 21% sched_debug.cpu#79.nr_uninterruptible
1320 Â 49% -64.8% 464 Â 15% sched_debug.cpu#61.ttwu_count
167 Â 21% -45.9% 90 Â 49% sched_debug.cfs_rq[61]:/.blocked_load_avg
7 Â 18% +48.6% 10 Â 28% sched_debug.cfs_rq[25]:/.load
7 Â 18% +60.0% 11 Â 34% sched_debug.cpu#25.load
175 Â 20% -44.3% 97 Â 47% sched_debug.cfs_rq[61]:/.tg_load_contrib
2406 Â 49% -58.3% 1003 Â 25% sched_debug.cpu#61.nr_switches
2417 Â 49% -58.1% 1014 Â 25% sched_debug.cpu#61.sched_count
613 Â 19% -34.6% 401 Â 25% sched_debug.cpu#61.sched_goidle
4.56 Â 1% +37.4% 6.26 Â 2% perf-profile.cpu-cycles.get_futex_key.futex_wait_setup.futex_wait.do_futex.sys_futex
85583 Â 9% -14.8% 72913 Â 7% sched_debug.cpu#0.nr_load_updates
29.29 Â 0% +19.2% 34.90 Â 2% perf-profile.cpu-cycles.futex_wait_setup.futex_wait.do_futex.sys_futex.system_call_fastpath
1.05 Â 3% -10.6% 0.94 Â 1% perf-profile.cpu-cycles.testcase
2.43 Â 2% -10.4% 2.18 Â 0% perf-profile.cpu-cycles.sysret_check.syscall
84405 Â 7% -11.0% 75139 Â 7% sched_debug.cfs_rq[0]:/.exec_clock
1.07 Â 2% -14.7% 0.91 Â 2% perf-profile.cpu-cycles._raw_spin_unlock.futex_wait_setup.futex_wait.do_futex.sys_futex
5.91 Â 0% -10.2% 5.31 Â 2% perf-profile.cpu-cycles._raw_spin_lock.futex_wait_setup.futex_wait.do_futex.sys_futex
66640 Â 5% +5.7% 70433 Â 6% sched_debug.cpu#10.nr_load_updates
4274 Â 3% -12.0% 3762 Â 7% sched_debug.cpu#21.curr->pid

testbox/testcase/testparams: wsm/will-it-scale/performance-futex3

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
11676004 Â 0% -10.3% 10473333 Â 0% will-it-scale.per_thread_ops
11515138 Â 0% -8.8% 10501984 Â 0% will-it-scale.per_process_ops
0.69 Â 3% +8.2% 0.75 Â 1% will-it-scale.scalability
1.76 Â 4% +364.0% 8.18 Â 0% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wake.do_futex.sys_futex.system_call_fastpath
76838319 Â 12% +24.4% 95586476 Â 5% cpuidle.POLL.time
163113 Â 44% +89.7% 309491 Â 14% sched_debug.cfs_rq[6]:/.spread0
16.31 Â 1% +40.2% 22.86 Â 0% perf-profile.cpu-cycles.futex_wake.do_futex.sys_futex.system_call_fastpath.syscall
89 Â 17% -26.8% 65 Â 24% sched_debug.cfs_rq[2]:/.load
88 Â 19% -24.1% 66 Â 23% sched_debug.cpu#2.load
100 Â 11% +20.3% 121 Â 13% sched_debug.cpu#6.load
87 Â 10% -24.6% 66 Â 10% sched_debug.cfs_rq[1]:/.load
787 Â 13% -22.1% 613 Â 9% sched_debug.cfs_rq[4]:/.blocked_load_avg
7.05 Â 0% +12.0% 7.89 Â 1% perf-profile.cpu-cycles.get_futex_key.futex_wake.do_futex.sys_futex.system_call_fastpath
2132 Â 11% +21.8% 2597 Â 12% cpuidle.C1-NHM.usage
77 Â 9% -15.4% 65 Â 10% sched_debug.cfs_rq[1]:/.runnable_load_avg
100 Â 13% +17.4% 118 Â 10% sched_debug.cpu#6.cpu_load[1]
101 Â 14% +17.8% 119 Â 10% sched_debug.cpu#6.cpu_load[2]
85 Â 10% -19.7% 68 Â 8% sched_debug.cpu#1.load
38.14 Â 0% +13.0% 43.08 Â 0% perf-profile.cpu-cycles.do_futex.sys_futex.system_call_fastpath.syscall
272.17 Â 0% -9.3% 246.76 Â 0% time.user_time
3.24 Â 4% -12.3% 2.84 Â 2% perf-profile.cpu-cycles.testcase
43.30 Â 0% +10.3% 47.76 Â 0% perf-profile.cpu-cycles.sys_futex.system_call_fastpath.syscall
3152 Â 6% -12.5% 2758 Â 8% sched_debug.cpu#2.curr->pid
74 Â 4% -13.5% 64 Â 7% sched_debug.cpu#1.cpu_load[0]
11.00 Â 2% -10.8% 9.81 Â 1% perf-profile.cpu-cycles.system_call_after_swapgs.syscall
10.10 Â 1% -14.3% 8.66 Â 1% perf-profile.cpu-cycles.system_call.syscall
729331 Â 4% +11.8% 815471 Â 4% sched_debug.cfs_rq[6]:/.min_vruntime
110881 Â 10% +13.5% 125833 Â 3% sched_debug.cfs_rq[6]:/.exec_clock
3.26 Â 0% -12.7% 2.85 Â 1% perf-profile.cpu-cycles.sysret_check.syscall
112231 Â 10% +13.1% 126884 Â 3% sched_debug.cpu#6.nr_load_updates
69 Â 3% -12.3% 60 Â 8% sched_debug.cpu#2.cpu_load[0]
693 Â 6% -10.9% 617 Â 2% sched_debug.cfs_rq[0]:/.tg_runnable_contrib
31740 Â 6% -10.5% 28410 Â 2% sched_debug.cfs_rq[0]:/.avg->runnable_avg_sum
566208 Â 7% -10.6% 505972 Â 2% sched_debug.cfs_rq[0]:/.min_vruntime

testbox/testcase/testparams: lkp-snb01/will-it-scale/performance-futex3

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
11940878 Â 0% -17.1% 9902263 Â 0% will-it-scale.per_thread_ops
11923215 Â 0% -17.3% 9861898 Â 0% will-it-scale.per_process_ops
0.61 Â 0% +12.0% 0.68 Â 0% will-it-scale.scalability
0.85 Â 1% +1283.8% 11.73 Â 0% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wake.do_futex.sys_futex.system_call_fastpath
235 Â 47% +154.9% 600 Â 26% sched_debug.cfs_rq[25]:/.blocked_load_avg
162 Â 19% -40.2% 97 Â 40% sched_debug.cfs_rq[22]:/.blocked_load_avg
272 Â 35% +130.6% 627 Â 26% sched_debug.cfs_rq[25]:/.tg_load_contrib
900 Â 31% -62.6% 337 Â 46% sched_debug.cpu#29.ttwu_local
192 Â 16% -33.8% 127 Â 31% sched_debug.cfs_rq[22]:/.tg_load_contrib
354 Â 15% +179.4% 991 Â 40% sched_debug.cpu#22.ttwu_count
181 Â 43% -49.9% 90 Â 8% sched_debug.cpu#19.ttwu_local
1491 Â 37% -35.8% 958 Â 24% sched_debug.cpu#26.sched_goidle
524 Â 45% -48.3% 271 Â 18% sched_debug.cfs_rq[26]:/.tg_load_contrib
312620 Â 45% +88.4% 588988 Â 18% sched_debug.cfs_rq[16]:/.spread0
490 Â 49% -50.2% 244 Â 14% sched_debug.cfs_rq[26]:/.blocked_load_avg
3749 Â 27% -39.5% 2268 Â 27% sched_debug.cpu#29.nr_switches
14.06 Â 0% +99.3% 28.02 Â 0% perf-profile.cpu-cycles.futex_wake.do_futex.sys_futex.system_call_fastpath.syscall
4096 Â 24% -38.9% 2502 Â 18% sched_debug.cpu#3.nr_switches
1517 Â 19% -26.3% 1118 Â 18% sched_debug.cpu#3.sched_goidle
3955 Â 18% -18.5% 3225 Â 8% sched_debug.cpu#28.curr->pid
6.22 Â 1% +53.6% 9.55 Â 0% perf-profile.cpu-cycles.get_futex_key.futex_wake.do_futex.sys_futex.system_call_fastpath
51 Â 11% -20.0% 41 Â 3% sched_debug.cpu#0.load
51 Â 11% -20.0% 41 Â 3% sched_debug.cfs_rq[0]:/.load
3865 Â 12% +29.7% 5013 Â 19% sched_debug.cpu#15.sched_goidle
82552 Â 6% -17.5% 68090 Â 3% sched_debug.cpu#0.nr_load_updates
1.24 Â 3% -19.5% 1.00 Â 2% perf-profile.cpu-cycles.drop_futex_key_refs.isra.12.do_futex.sys_futex.system_call_fastpath.syscall
36.98 Â 1% +32.7% 49.09 Â 0% perf-profile.cpu-cycles.do_futex.sys_futex.system_call_fastpath.syscall
43.52 Â 1% +25.0% 54.42 Â 0% perf-profile.cpu-cycles.system_call_fastpath.syscall
54 Â 8% +16.6% 63 Â 8% sched_debug.cpu#16.cpu_load[1]
54 Â 6% +19.8% 65 Â 10% sched_debug.cpu#16.cpu_load[2]
666.39 Â 0% -14.9% 566.95 Â 0% time.user_time
54 Â 5% +23.6% 66 Â 14% sched_debug.cpu#16.cpu_load[4]
4.59 Â 1% -20.6% 3.65 Â 1% perf-profile.cpu-cycles.testcase
4478 Â 5% -12.4% 3921 Â 6% sched_debug.cpu#0.curr->pid
41.87 Â 1% +26.7% 53.06 Â 0% perf-profile.cpu-cycles.sys_futex.system_call_fastpath.syscall
11.37 Â 1% -19.8% 9.12 Â 0% perf-profile.cpu-cycles.system_call_after_swapgs.syscall
13.93 Â 2% -18.7% 11.32 Â 1% perf-profile.cpu-cycles.system_call.syscall
2.46 Â 4% -17.3% 2.04 Â 3% perf-profile.cpu-cycles.sysret_check.syscall
83964 Â 6% -18.1% 68727 Â 4% sched_debug.cfs_rq[0]:/.exec_clock
54 Â 6% +22.7% 66 Â 13% sched_debug.cpu#16.cpu_load[3]
102941 Â 4% +15.5% 118874 Â 2% sched_debug.cfs_rq[16]:/.exec_clock
645 Â 5% -13.2% 560 Â 4% sched_debug.cfs_rq[0]:/.tg_runnable_contrib
29634 Â 5% -13.2% 25712 Â 4% sched_debug.cfs_rq[0]:/.avg->runnable_avg_sum
1087374 Â 6% -13.1% 944559 Â 6% sched_debug.cfs_rq[0]:/.min_vruntime
2464 Â 6% -9.0% 2243 Â 2% numa-meminfo.node1.KernelStack
110191 Â 4% +12.1% 123516 Â 2% sched_debug.cpu#16.nr_load_updates
34745 Â 4% +11.5% 38751 Â 3% sched_debug.cfs_rq[16]:/.avg->runnable_avg_sum
757 Â 4% +11.5% 844 Â 3% sched_debug.cfs_rq[16]:/.tg_runnable_contrib

testbox/testcase/testparams: lkp-sbx04/will-it-scale/performance-futex3

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
11423732 Â 0% -17.8% 9387203 Â 0% will-it-scale.per_thread_ops
11419511 Â 0% -18.0% 9368303 Â 0% will-it-scale.per_process_ops
0.60 Â 0% +11.9% 0.68 Â 0% will-it-scale.scalability
0.84 Â 3% +1303.0% 11.82 Â 0% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wake.do_futex.sys_futex.system_call_fastpath
68 Â 41% +125.1% 154 Â 37% sched_debug.cfs_rq[31]:/.blocked_load_avg
76 Â 38% +111.5% 161 Â 35% sched_debug.cfs_rq[31]:/.tg_load_contrib
1603 Â 47% -53.1% 751 Â 35% sched_debug.cpu#6.ttwu_count
937 Â 13% +84.1% 1726 Â 41% sched_debug.cpu#30.ttwu_local
3733 Â 12% +37.0% 5117 Â 23% sched_debug.cpu#14.sched_count
131 Â 17% +146.6% 324 Â 22% sched_debug.cpu#47.ttwu_local
96 Â 12% +106.2% 199 Â 35% sched_debug.cpu#33.ttwu_local
199 Â 32% -50.1% 99 Â 16% sched_debug.cfs_rq[53]:/.blocked_load_avg
210 Â 31% -48.2% 109 Â 12% sched_debug.cfs_rq[53]:/.tg_load_contrib
2354 Â 19% -41.9% 1368 Â 33% sched_debug.cpu#48.nr_switches
47 Â 44% +131.9% 110 Â 43% sched_debug.cfs_rq[33]:/.blocked_load_avg
11 Â 36% -45.7% 6 Â 6% sched_debug.cpu#31.load
2824 Â 19% -37.3% 1771 Â 29% sched_debug.cpu#63.nr_switches
7 Â 5% +61.3% 12 Â 20% sched_debug.cpu#62.cpu_load[0]
2846 Â 18% -37.4% 1783 Â 29% sched_debug.cpu#63.sched_count
183 Â 21% -52.3% 87 Â 33% sched_debug.cfs_rq[46]:/.blocked_load_avg
70 Â 30% +89.3% 132 Â 36% sched_debug.cfs_rq[33]:/.tg_load_contrib
1064 Â 10% -35.2% 689 Â 25% sched_debug.cpu#63.sched_goidle
5806 Â 11% +23.7% 7184 Â 18% sched_debug.cpu#28.nr_switches
14.28 Â 0% +97.8% 28.25 Â 0% perf-profile.cpu-cycles.futex_wake.do_futex.sys_futex.system_call_fastpath.syscall
198 Â 20% -48.4% 102 Â 28% sched_debug.cfs_rq[46]:/.tg_load_contrib
501 Â 5% -49.9% 251 Â 47% sched_debug.cpu#40.ttwu_local
6 Â 6% +40.0% 8 Â 14% sched_debug.cpu#25.cpu_load[1]
803 Â 26% -33.1% 538 Â 12% sched_debug.cpu#53.ttwu_count
11 Â 28% +48.9% 17 Â 15% sched_debug.cpu#51.cpu_load[0]
83 Â 17% +42.7% 119 Â 26% sched_debug.cpu#39.ttwu_local
6 Â 6% +32.0% 8 Â 15% sched_debug.cpu#25.cpu_load[2]
22 Â 23% -30.0% 15 Â 12% sched_debug.cpu#45.cpu_load[1]
2528 Â 7% -19.1% 2045 Â 17% sched_debug.cpu#18.sched_goidle
1219 Â 22% +39.0% 1695 Â 19% sched_debug.cpu#47.sched_count
1208 Â 22% +27.3% 1538 Â 5% sched_debug.cpu#47.nr_switches
944 Â 13% -32.7% 635 Â 15% sched_debug.cpu#40.ttwu_count
19 Â 12% -23.4% 14 Â 5% sched_debug.cpu#45.cpu_load[2]
2946 Â 10% +18.9% 3502 Â 12% sched_debug.cpu#61.curr->pid
561 Â 10% +35.3% 759 Â 5% sched_debug.cpu#47.ttwu_count
6.28 Â 0% +54.7% 9.71 Â 1% perf-profile.cpu-cycles.get_futex_key.futex_wake.do_futex.sys_futex.system_call_fastpath
6 Â 0% +29.2% 7 Â 10% sched_debug.cpu#25.cpu_load[3]
531 Â 12% +31.8% 700 Â 5% sched_debug.cpu#44.ttwu_count
1.27 Â 1% -22.1% 0.99 Â 1% perf-profile.cpu-cycles.drop_futex_key_refs.isra.12.do_futex.sys_futex.system_call_fastpath.syscall
37.44 Â 0% +32.2% 49.50 Â 0% perf-profile.cpu-cycles.do_futex.sys_futex.system_call_fastpath.syscall
1004 Â 5% +24.3% 1247 Â 6% numa-meminfo.node3.PageTables
253 Â 6% +23.2% 312 Â 6% numa-vmstat.node3.nr_page_table_pages
44.13 Â 0% +24.3% 54.84 Â 0% perf-profile.cpu-cycles.system_call_fastpath.syscall
1967 Â 7% -12.4% 1722 Â 5% numa-meminfo.node2.KernelStack
3710 Â 4% -14.0% 3191 Â 1% sched_debug.cpu#31.curr->pid
1141.59 Â 0% -16.2% 956.67 Â 0% time.user_time
4.56 Â 0% -18.7% 3.71 Â 2% perf-profile.cpu-cycles.testcase
42.42 Â 0% +26.2% 53.51 Â 0% perf-profile.cpu-cycles.sys_futex.system_call_fastpath.syscall
436335 Â 2% -9.8% 393521 Â 1% softirqs.RCU
3339 Â 2% +12.7% 3764 Â 9% sched_debug.cpu#54.curr->pid
973122 Â 0% +9.1% 1062051 Â 8% sched_debug.cfs_rq[53]:/.min_vruntime
11.40 Â 0% -19.5% 9.18 Â 0% perf-profile.cpu-cycles.system_call_after_swapgs.syscall
14.13 Â 0% -19.8% 11.34 Â 0% perf-profile.cpu-cycles.system_call.syscall
2.46 Â 0% -19.6% 1.98 Â 2% perf-profile.cpu-cycles.sysret_check.syscall
1820 Â 13% -29.1% 1290 Â 22% sched_debug.cpu#40.nr_switches
8 Â 0% +12.5% 9 Â 0% sched_debug.cpu#16.cpu_load[3]
1630 Â 14% +38.0% 2250 Â 12% sched_debug.cpu#13.sched_goidle
12601 Â 7% -11.4% 11165 Â 3% numa-meminfo.node0.SReclaimable
3149 Â 7% -11.4% 2791 Â 3% numa-vmstat.node0.nr_slab_reclaimable
9845 Â 9% +15.7% 11388 Â 7% numa-meminfo.node1.SReclaimable
2460 Â 9% +15.7% 2846 Â 7% numa-vmstat.node1.nr_slab_reclaimable
1410 Â 5% +9.6% 1545 Â 4% slabinfo.mm_struct.active_objs
3055 Â 12% +14.1% 3484 Â 8% sched_debug.cpu#26.curr->pid
1410 Â 5% +9.6% 1545 Â 4% slabinfo.mm_struct.num_objs
5757 Â 7% +13.3% 6521 Â 6% numa-vmstat.node3.nr_slab_unreclaimable
23031 Â 7% +13.3% 26086 Â 6% numa-meminfo.node3.SUnreclaim

testbox/testcase/testparams: lkp-snb01/will-it-scale/performance-futex4

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
7502355 Â 0% -11.5% 6637575 Â 1% will-it-scale.per_thread_ops
7513211 Â 0% -10.9% 6692022 Â 0% will-it-scale.per_process_ops
0.65 Â 0% +3.9% 0.68 Â 0% will-it-scale.scalability
0.53 Â 3% +1423.7% 8.04 Â 1% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wait_setup.futex_wait.do_futex.sys_futex
1699 Â 37% -68.6% 533 Â 35% sched_debug.cpu#25.ttwu_local
513 Â 39% -50.7% 253 Â 38% sched_debug.cfs_rq[27]:/.tg_load_contrib
2846 Â 28% -36.7% 1801 Â 5% sched_debug.cpu#25.ttwu_count
33 Â 45% -54.9% 15 Â 18% sched_debug.cpu#10.load
450 Â 26% +165.3% 1195 Â 45% sched_debug.cpu#23.sched_goidle
18 Â 8% +81.1% 33 Â 38% sched_debug.cpu#29.cpu_load[2]
5401 Â 26% -49.1% 2748 Â 8% sched_debug.cpu#25.nr_switches
17 Â 14% +71.4% 30 Â 40% sched_debug.cfs_rq[14]:/.load
37 Â 23% -50.7% 18 Â 17% sched_debug.cpu#26.cpu_load[0]
493 Â 41% -52.4% 234 Â 42% sched_debug.cfs_rq[27]:/.blocked_load_avg
17 Â 4% +51.4% 26 Â 27% sched_debug.cpu#29.cpu_load[3]
2116 Â 32% +87.3% 3963 Â 44% sched_debug.cpu#28.nr_switches
752 Â 11% +78.1% 1340 Â 32% sched_debug.cpu#28.sched_goidle
11262 Â 14% -25.6% 8382 Â 20% sched_debug.cpu#10.nr_switches
3179 Â 4% +35.0% 4292 Â 19% sched_debug.cpu#29.curr->pid
4945 Â 14% -25.4% 3687 Â 21% sched_debug.cpu#10.sched_goidle
4.60 Â 1% +56.9% 7.21 Â 0% perf-profile.cpu-cycles.get_futex_key.futex_wait_setup.futex_wait.do_futex.sys_futex
16 Â 2% +33.8% 21 Â 19% sched_debug.cpu#29.cpu_load[4]
22 Â 27% -36.4% 14 Â 14% sched_debug.cpu#10.cpu_load[0]
1676 Â 5% -18.0% 1374 Â 15% numa-meminfo.node0.PageTables
418 Â 5% -17.8% 343 Â 15% numa-vmstat.node0.nr_page_table_pages
16 Â 2% +24.6% 20 Â 9% sched_debug.cpu#13.load
16 Â 0% +25.0% 20 Â 9% sched_debug.cfs_rq[13]:/.load
16 Â 0% +26.6% 20 Â 9% sched_debug.cpu#13.cpu_load[0]
3345 Â 14% +27.5% 4266 Â 8% sched_debug.cpu#13.ttwu_count
10925 Â 4% -15.6% 9218 Â 3% slabinfo.kmalloc-256.active_objs
31.80 Â 1% +24.9% 39.74 Â 0% perf-profile.cpu-cycles.futex_wait_setup.futex_wait.do_futex.sys_futex.system_call_fastpath
11691 Â 4% -14.7% 9976 Â 3% slabinfo.kmalloc-256.num_objs
465.75 Â 0% -11.2% 413.51 Â 0% time.user_time
16 Â 0% +15.6% 18 Â 6% sched_debug.cpu#13.cpu_load[1]
7.28 Â 1% -11.7% 6.43 Â 2% perf-profile.cpu-cycles.system_call_after_swapgs.syscall
8.14 Â 0% -11.1% 7.23 Â 1% perf-profile.cpu-cycles.system_call.syscall
1.77 Â 3% -10.1% 1.59 Â 2% perf-profile.cpu-cycles.sysret_check.syscall
1.97 Â 2% -8.5% 1.80 Â 2% perf-profile.cpu-cycles._raw_spin_unlock.futex_wait_setup.futex_wait.do_futex.sys_futex
3865 Â 13% +15.8% 4474 Â 3% numa-vmstat.node1.nr_anon_pages
15441 Â 13% +15.9% 17889 Â 3% numa-meminfo.node1.AnonPages
6.95 Â 2% -11.4% 6.16 Â 0% perf-profile.cpu-cycles._raw_spin_lock.futex_wait_setup.futex_wait.do_futex.sys_futex
17320 Â 12% -14.0% 14897 Â 3% numa-meminfo.node0.Active(anon)
4329 Â 12% -14.0% 3723 Â 3% numa-vmstat.node0.nr_active_anon
16 Â 0% +12.5% 18 Â 6% sched_debug.cpu#12.cpu_load[4]
12.79 Â 1% -9.9% 11.52 Â 0% perf-profile.cpu-cycles.hash_futex.futex_wait.do_futex.sys_futex.system_call_fastpath
16945 Â 12% -14.2% 14539 Â 3% numa-meminfo.node0.AnonPages
4235 Â 12% -14.2% 3634 Â 3% numa-vmstat.node0.nr_anon_pages

testbox/testcase/testparams: nhm4/will-it-scale/performance-futex4

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
7653177 Â 0% -6.5% 7158623 Â 0% will-it-scale.per_thread_ops
7616372 Â 0% -6.5% 7119979 Â 0% will-it-scale.per_process_ops
0.70 Â 0% +2.4% 0.71 Â 0% will-it-scale.scalability
1.31 Â 0% +385.8% 6.37 Â 2% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wait_setup.futex_wait.do_futex.sys_futex
847 Â 22% -39.7% 510 Â 21% sched_debug.cfs_rq[3]:/.blocked_load_avg
979 Â 18% -34.4% 642 Â 16% sched_debug.cfs_rq[3]:/.tg_load_contrib
139 Â 18% -25.7% 103 Â 17% sched_debug.cpu#6.load
5.35 Â 2% +25.7% 6.73 Â 2% perf-profile.cpu-cycles.get_futex_key.futex_wait_setup.futex_wait.do_futex.sys_futex
163 Â 11% -17.2% 135 Â 11% sched_debug.cpu#4.cpu_load[0]
30.79 Â 1% +15.4% 35.54 Â 1% perf-profile.cpu-cycles.futex_wait_setup.futex_wait.do_futex.sys_futex.system_call_fastpath
146.40 Â 2% -36.7% 92.65 Â 2% time.user_time
2734 Â 5% +10.7% 3027 Â 7% sched_debug.cpu#0.curr->pid
7.93 Â 2% -8.3% 7.27 Â 3% perf-profile.cpu-cycles.system_call_after_swapgs.syscall
7.20 Â 2% -10.7% 6.44 Â 1% perf-profile.cpu-cycles.system_call.syscall
2.28 Â 3% -8.6% 2.08 Â 3% perf-profile.cpu-cycles.sysret_check.syscall
12.98 Â 1% -10.2% 11.65 Â 2% perf-profile.cpu-cycles.hash_futex.futex_wait.do_futex.sys_futex.system_call_fastpath

testbox/testcase/testparams: lkp-sbx04/will-it-scale/performance-futex4

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
7139956 Â 0% -11.0% 6357641 Â 0% will-it-scale.per_thread_ops
7161715 Â 0% -11.3% 6351262 Â 0% will-it-scale.per_process_ops
0.64 Â 0% +4.5% 0.67 Â 0% will-it-scale.scalability
379 Â 48% -72.3% 105 Â 35% sched_debug.cpu#37.ttwu_local
0.53 Â 3% +1395.3% 7.92 Â 1% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wait_setup.futex_wait.do_futex.sys_futex
860 Â 22% +44.8% 1245 Â 16% sched_debug.cpu#17.ttwu_local
1025 Â 43% +95.4% 2003 Â 42% sched_debug.cpu#32.sched_count
1010 Â 43% +96.8% 1989 Â 43% sched_debug.cpu#32.nr_switches
361 Â 28% -46.7% 192 Â 19% sched_debug.cfs_rq[60]:/.blocked_load_avg
368 Â 27% -45.8% 199 Â 20% sched_debug.cfs_rq[60]:/.tg_load_contrib
525 Â 20% +63.2% 857 Â 30% sched_debug.cpu#48.sched_goidle
1292 Â 18% +109.9% 2713 Â 37% sched_debug.cpu#48.nr_switches
375 Â 30% +60.9% 603 Â 30% sched_debug.cpu#32.sched_goidle
1903 Â 17% +35.2% 2572 Â 21% sched_debug.cpu#23.ttwu_count
888 Â 33% +53.7% 1365 Â 21% sched_debug.cpu#34.sched_count
777 Â 21% +52.7% 1186 Â 7% sched_debug.cpu#21.ttwu_local
504 Â 21% +44.3% 727 Â 11% sched_debug.cpu#55.sched_goidle
1410 Â 16% -46.8% 749 Â 11% sched_debug.cpu#60.ttwu_count
622 Â 21% -44.8% 343 Â 17% sched_debug.cpu#47.sched_goidle
148 Â 36% +58.8% 235 Â 22% sched_debug.cfs_rq[49]:/.blocked_load_avg
878 Â 33% +54.0% 1352 Â 21% sched_debug.cpu#34.nr_switches
645 Â 8% +98.8% 1283 Â 42% sched_debug.cpu#48.ttwu_count
157 Â 34% +56.0% 245 Â 21% sched_debug.cfs_rq[49]:/.tg_load_contrib
91 Â 34% +89.1% 173 Â 37% sched_debug.cfs_rq[41]:/.blocked_load_avg
1211 Â 20% +53.1% 1855 Â 12% sched_debug.cpu#55.nr_switches
109 Â 26% +74.7% 191 Â 34% sched_debug.cfs_rq[41]:/.tg_load_contrib
2418 Â 25% +62.7% 3934 Â 17% numa-vmstat.node3.nr_active_anon
9680 Â 25% +62.6% 15742 Â 17% numa-meminfo.node3.Active(anon)
744 Â 32% -36.1% 475 Â 19% sched_debug.cpu#45.ttwu_count
546 Â 19% +34.6% 735 Â 14% sched_debug.cpu#61.sched_goidle
11 Â 14% -25.0% 8 Â 13% sched_debug.cpu#54.load
1912 Â 7% +48.1% 2831 Â 10% sched_debug.cpu#17.ttwu_count
4.64 Â 0% +56.4% 7.26 Â 1% perf-profile.cpu-cycles.get_futex_key.futex_wait_setup.futex_wait.do_futex.sys_futex
14 Â 8% +28.8% 19 Â 8% sched_debug.cpu#46.cpu_load[0]
4908 Â 6% +34.3% 6593 Â 8% sched_debug.cpu#17.nr_switches
2287 Â 3% +29.7% 2965 Â 6% sched_debug.cpu#17.sched_goidle
66715375 Â 27% -38.5% 41040216 Â 25% cpuidle.C1-SNB.time
1866 Â 7% -29.1% 1323 Â 9% sched_debug.cpu#60.nr_switches
1877 Â 7% -21.7% 1470 Â 12% sched_debug.cpu#60.sched_count
3412 Â 3% +34.1% 4577 Â 5% numa-vmstat.node2.nr_anon_pages
13651 Â 3% +34.1% 18306 Â 5% numa-meminfo.node2.AnonPages
1859 Â 10% +52.1% 2828 Â 21% sched_debug.cpu#62.sched_count
30187 Â 9% +22.8% 37073 Â 8% numa-meminfo.node3.Active
199 Â 2% +27.4% 253 Â 7% sched_debug.cpu#35.ttwu_count
307 Â 27% -28.0% 221 Â 17% sched_debug.cfs_rq[56]:/.tg_load_contrib
13213 Â 6% -14.4% 11313 Â 14% numa-meminfo.node2.SReclaimable
3302 Â 6% -14.4% 2828 Â 14% numa-vmstat.node2.nr_slab_reclaimable
32.22 Â 0% +24.4% 40.07 Â 1% perf-profile.cpu-cycles.futex_wait_setup.futex_wait.do_futex.sys_futex.system_call_fastpath
794.01 Â 0% -11.4% 703.26 Â 0% time.user_time
27693 Â 4% +11.5% 30884 Â 3% numa-vmstat.node3.nr_file_pages
110774 Â 4% +11.5% 123540 Â 3% numa-meminfo.node3.FilePages
7.30 Â 0% -14.6% 6.24 Â 0% perf-profile.cpu-cycles.system_call_after_swapgs.syscall
122283 Â 3% -10.2% 109866 Â 3% numa-meminfo.node2.FilePages
30570 Â 3% -10.2% 27466 Â 3% numa-vmstat.node2.nr_file_pages
1.78 Â 2% -13.1% 1.55 Â 3% perf-profile.cpu-cycles.sysret_check.syscall
2.04 Â 3% -12.0% 1.79 Â 1% perf-profile.cpu-cycles._raw_spin_unlock.futex_wait_setup.futex_wait.do_futex.sys_futex
7.00 Â 0% -12.9% 6.09 Â 2% perf-profile.cpu-cycles._raw_spin_lock.futex_wait_setup.futex_wait.do_futex.sys_futex
14 Â 5% +15.3% 17 Â 4% sched_debug.cpu#46.cpu_load[1]
1363 Â 2% +10.6% 1507 Â 3% slabinfo.mm_struct.active_objs
12.81 Â 1% -12.0% 11.27 Â 1% perf-profile.cpu-cycles.hash_futex.futex_wait.do_futex.sys_futex.system_call_fastpath

testbox/testcase/testparams: lkp-g5/will-it-scale/performance-futex3

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
8319207 Â 0% -9.9% 7498648 Â 0% will-it-scale.per_thread_ops
8330959 Â 0% -9.9% 7503988 Â 0% will-it-scale.per_process_ops
1800 Â 0% -10.3% 1615 Â 0% will-it-scale.time.user_time
0.58 Â 0% +7.4% 0.63 Â 0% will-it-scale.scalability
5374 Â 0% +3.4% 5557 Â 0% will-it-scale.time.system_time
2.37 Â 11% +335.3% 10.30 Â 10% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wake.do_futex.sys_futex.system_call_fastpath
13 Â 31% +250.0% 45 Â 44% sched_debug.cfs_rq[13]:/.tg_load_contrib
909 Â 46% +124.5% 2041 Â 14% sched_debug.cpu#65.ttwu_local
472 Â 12% +184.0% 1340 Â 49% sched_debug.cpu#2.sched_goidle
1642 Â 19% -55.8% 726 Â 43% sched_debug.cpu#27.ttwu_local
2326 Â 21% -70.9% 676 Â 38% sched_debug.cpu#32.ttwu_local
351 Â 23% +81.6% 637 Â 35% sched_debug.cpu#23.ttwu_local
1674 Â 47% +151.0% 4203 Â 33% numa-meminfo.node6.Active(anon)
418 Â 48% +151.3% 1050 Â 33% numa-vmstat.node6.nr_active_anon
1754 Â 46% +144.9% 4297 Â 33% numa-meminfo.node6.AnonPages
438 Â 46% +145.0% 1073 Â 33% numa-vmstat.node6.nr_anon_pages
4561 Â 27% -70.9% 1328 Â 27% sched_debug.cpu#32.ttwu_count
1261 Â 15% +145.4% 3094 Â 46% sched_debug.cpu#2.nr_switches
1 Â 35% +87.5% 2 Â 20% sched_debug.cfs_rq[28]:/.nr_spread_over
2285 Â 23% +38.1% 3155 Â 27% sched_debug.cpu#23.nr_switches
1621 Â 2% -47.2% 855 Â 41% sched_debug.cpu#29.ttwu_local
10 Â 4% +86.3% 19 Â 33% sched_debug.cfs_rq[3]:/.tg_load_contrib
351 Â 30% +64.5% 578 Â 31% sched_debug.cpu#24.ttwu_local
2931 Â 11% -48.9% 1499 Â 22% sched_debug.cpu#27.ttwu_count
1044 Â 24% +42.4% 1486 Â 28% sched_debug.cpu#23.sched_goidle
8008 Â 6% -57.6% 3395 Â 25% sched_debug.cpu#32.nr_switches
362 Â 30% +77.4% 643 Â 34% sched_debug.cpu#20.ttwu_local
21 Â 27% +66.7% 35 Â 26% sched_debug.cfs_rq[100]:/.tg_load_contrib
3168 Â 18% -50.3% 1573 Â 34% sched_debug.cpu#29.ttwu_count
426 Â 41% +57.4% 671 Â 29% sched_debug.cpu#34.sched_goidle
18 Â 37% +114.3% 40 Â 25% sched_debug.cfs_rq[1]:/.tg_load_contrib
985 Â 30% +56.2% 1538 Â 19% sched_debug.cpu#22.ttwu_count
945 Â 28% +56.7% 1482 Â 32% sched_debug.cpu#21.ttwu_count
22 Â 38% +57.7% 35 Â 36% sched_debug.cfs_rq[103]:/.tg_load_contrib
389 Â 18% +89.7% 738 Â 8% sched_debug.cpu#87.ttwu_local
32 Â 41% +74.7% 56 Â 26% sched_debug.cfs_rq[64]:/.blocked_load_avg
887 Â 24% +59.4% 1413 Â 32% sched_debug.cpu#20.ttwu_count
6242 Â 9% -42.9% 3562 Â 17% sched_debug.cpu#27.nr_switches
3293 Â 18% -54.7% 1493 Â 25% sched_debug.cpu#32.sched_goidle
37 Â 36% +70.9% 63 Â 19% sched_debug.cfs_rq[64]:/.tg_load_contrib
759 Â 18% +75.3% 1330 Â 18% sched_debug.cpu#87.ttwu_count
919 Â 28% +54.2% 1416 Â 32% sched_debug.cpu#23.ttwu_count
183 Â 35% -65.8% 62 Â 38% sched_debug.cfs_rq[121]:/.blocked_load_avg
185 Â 34% -64.6% 65 Â 35% sched_debug.cfs_rq[121]:/.tg_load_contrib
355 Â 25% +74.0% 618 Â 35% sched_debug.cpu#21.ttwu_local
538 Â 17% +66.1% 893 Â 33% sched_debug.cpu#111.sched_count
1784 Â 6% +48.9% 2657 Â 27% sched_debug.cpu#2.ttwu_count
1986 Â 5% +70.2% 3379 Â 14% sched_debug.cpu#65.sched_goidle
2955 Â 9% -45.5% 1609 Â 18% sched_debug.cpu#27.sched_goidle
1149 Â 38% +56.4% 1796 Â 24% sched_debug.cpu#34.nr_switches
23615 Â 21% -46.6% 12602 Â 24% sched_debug.cpu#88.sched_count
703 Â 18% -24.4% 532 Â 12% sched_debug.cpu#48.ttwu_count
6768 Â 15% -37.6% 4221 Â 20% sched_debug.cpu#29.nr_switches
4471 Â 7% +66.1% 7429 Â 13% sched_debug.cpu#65.nr_switches
644953 Â 7% -44.9% 355582 Â 21% sched_debug.cfs_rq[94]:/.min_vruntime
728 Â 8% +11.1% 809 Â 9% sched_debug.cpu#55.nr_switches
235 Â 11% +60.7% 378 Â 32% sched_debug.cpu#115.ttwu_count
2547 Â 8% +34.0% 3411 Â 23% sched_debug.cpu#18.nr_switches
268 Â 14% +38.1% 370 Â 23% sched_debug.cpu#102.sched_goidle
1480 Â 5% +59.4% 2358 Â 24% numa-vmstat.node3.nr_slab_reclaimable
5921 Â 5% +59.4% 9436 Â 24% numa-meminfo.node3.SReclaimable
529 Â 17% +32.9% 703 Â 19% sched_debug.cpu#111.nr_switches
8 Â 19% +55.8% 13 Â 21% sched_debug.cpu#41.cpu_load[0]
845 Â 8% +50.6% 1272 Â 4% sched_debug.cpu#87.sched_goidle
3208 Â 15% -40.0% 1924 Â 23% sched_debug.cpu#29.sched_goidle
1836 Â 5% +52.4% 2798 Â 11% sched_debug.cpu#87.nr_switches
2424 Â 31% +43.9% 3489 Â 11% sched_debug.cpu#65.ttwu_count
167 Â 16% +33.1% 222 Â 17% sched_debug.cpu#103.ttwu_count
916 Â 15% +50.5% 1379 Â 15% sched_debug.cpu#84.ttwu_count
161 Â 8% +41.6% 229 Â 14% sched_debug.cpu#51.ttwu_local
12493 Â 19% -31.2% 8594 Â 14% sched_debug.cfs_rq[122]:/.exec_clock
153 Â 3% +23.9% 190 Â 19% sched_debug.cpu#55.ttwu_local
9141 Â 4% +29.4% 11831 Â 10% sched_debug.cfs_rq[126]:/.avg->runnable_avg_sum
230 Â 17% +31.1% 302 Â 18% sched_debug.cpu#111.sched_goidle
198 Â 3% +29.7% 256 Â 10% sched_debug.cfs_rq[126]:/.tg_runnable_contrib
652664 Â 21% -36.4% 415054 Â 15% meminfo.Committed_AS
1196 Â 8% +34.2% 1606 Â 24% sched_debug.cpu#18.sched_goidle
1583 Â 8% +26.2% 1998 Â 12% sched_debug.cpu#80.ttwu_count
11 Â 12% -34.1% 7 Â 15% sched_debug.cfs_rq[9]:/.load
11 Â 19% -34.1% 7 Â 15% sched_debug.cpu#9.cpu_load[0]
11 Â 12% -34.1% 7 Â 15% sched_debug.cpu#9.load
3032 Â 5% +32.3% 4013 Â 11% sched_debug.cpu#126.curr->pid
6039 Â 49% -49.8% 3033 Â 13% sched_debug.cpu#90.ttwu_count
442 Â 9% -34.9% 288 Â 11% sched_debug.cpu#62.ttwu_local
954 Â 31% +68.0% 1604 Â 46% sched_debug.cpu#81.ttwu_count
251 Â 19% +39.5% 350 Â 28% sched_debug.cpu#36.sched_goidle
778 Â 13% +25.7% 978 Â 10% sched_debug.cpu#53.nr_switches
406 Â 9% -35.1% 263 Â 9% sched_debug.cpu#41.ttwu_local
12511 Â 15% +18.7% 14847 Â 13% numa-meminfo.node6.Active
646 Â 14% +38.6% 896 Â 27% sched_debug.cpu#115.nr_switches
1802 Â 15% +39.0% 2505 Â 13% sched_debug.cpu#86.nr_switches
838 Â 16% +38.2% 1159 Â 12% sched_debug.cpu#86.sched_goidle
655 Â 14% +38.1% 904 Â 27% sched_debug.cpu#115.sched_count
660 Â 14% +51.7% 1001 Â 42% sched_debug.cpu#119.sched_count
8 Â 14% +38.0% 11 Â 19% sched_debug.cpu#41.cpu_load[1]
527 Â 16% -34.3% 346 Â 18% sched_debug.cpu#96.ttwu_local
354 Â 22% +60.8% 569 Â 22% sched_debug.cpu#18.ttwu_local
8 Â 10% -25.0% 6 Â 0% sched_debug.cpu#17.cpu_load[0]
8 Â 10% -25.0% 6 Â 0% sched_debug.cfs_rq[17]:/.runnable_load_avg
8 Â 5% -28.0% 6 Â 0% sched_debug.cpu#17.cpu_load[1]
8 Â 5% -28.0% 6 Â 0% sched_debug.cpu#17.cpu_load[2]
8 Â 10% -25.0% 6 Â 0% sched_debug.cpu#17.cpu_load[4]
8 Â 10% -25.0% 6 Â 0% sched_debug.cfs_rq[17]:/.load
8 Â 10% -25.0% 6 Â 0% sched_debug.cpu#17.cpu_load[3]
8 Â 10% -25.0% 6 Â 0% sched_debug.cpu#17.load
10464 Â 8% -32.9% 7025 Â 17% sched_debug.cfs_rq[94]:/.avg->runnable_avg_sum
227 Â 8% -33.1% 152 Â 17% sched_debug.cfs_rq[94]:/.tg_runnable_contrib
3279 Â 11% +13.7% 3729 Â 11% sched_debug.cpu#125.curr->pid
2956 Â 4% +17.5% 3474 Â 9% sched_debug.cpu#127.curr->pid
637 Â 9% -25.4% 475 Â 18% sched_debug.cpu#62.ttwu_count
9187 Â 1% +21.9% 11201 Â 10% sched_debug.cfs_rq[127]:/.avg->runnable_avg_sum
8 Â 5% -19.2% 7 Â 10% sched_debug.cpu#9.cpu_load[4]
200 Â 1% +21.4% 243 Â 10% sched_debug.cfs_rq[127]:/.tg_runnable_contrib
611 Â 8% -16.8% 509 Â 6% sched_debug.cpu#41.sched_goidle
23669 Â 14% +18.0% 27933 Â 10% numa-meminfo.node3.Slab
753533 Â 11% +16.9% 880591 Â 14% sched_debug.cfs_rq[125]:/.min_vruntime
675885 Â 5% +36.5% 922395 Â 8% sched_debug.cfs_rq[126]:/.min_vruntime
1557 Â 0% -18.8% 1264 Â 6% sched_debug.cpu#41.nr_switches
1572 Â 0% -18.8% 1277 Â 5% sched_debug.cpu#41.sched_count
9 Â 4% -22.4% 7 Â 14% sched_debug.cpu#9.cpu_load[2]
10 Â 8% -25.0% 7 Â 14% sched_debug.cpu#9.cpu_load[1]
428 Â 19% -27.8% 309 Â 15% sched_debug.cpu#54.sched_goidle
3838 Â 1% -21.8% 3000 Â 5% sched_debug.cpu#122.curr->pid
772 Â 3% +15.6% 893 Â 12% numa-vmstat.node4.nr_alloc_batch
266 Â 13% +38.2% 368 Â 35% sched_debug.cpu#119.sched_goidle
1763 Â 2% -16.5% 1472 Â 10% numa-vmstat.node6.nr_slab_reclaimable
7054 Â 2% -16.5% 5889 Â 9% numa-meminfo.node6.SReclaimable
1800.43 Â 0% -10.3% 1615.50 Â 0% time.user_time
1018 Â 17% -23.4% 780 Â 11% sched_debug.cpu#54.nr_switches
10904 Â 6% -15.2% 9246 Â 6% sched_debug.cfs_rq[122]:/.avg->runnable_avg_sum
237 Â 6% -15.3% 201 Â 6% sched_debug.cfs_rq[122]:/.tg_runnable_contrib
461 Â 4% +36.1% 627 Â 27% sched_debug.cpu#43.sched_goidle
488 Â 9% -20.3% 389 Â 11% sched_debug.cpu#58.ttwu_local
974 Â 18% -25.6% 725 Â 11% numa-meminfo.node7.PageTables
57552 Â 7% +14.9% 66133 Â 6% sched_debug.cfs_rq[69]:/.exec_clock
3658 Â 6% +15.7% 4230 Â 11% sched_debug.cpu#80.nr_switches
794 Â 6% +9.3% 867 Â 7% numa-vmstat.node0.nr_alloc_batch
331 Â 14% +26.6% 419 Â 12% sched_debug.cpu#59.ttwu_local
75174 Â 6% -12.1% 66087 Â 6% sched_debug.cfs_rq[101]:/.exec_clock
164 Â 10% +25.4% 206 Â 20% sched_debug.cpu#114.ttwu_local
1618315 Â 5% +2.4% 1656815 Â 5% sched_debug.cfs_rq[58]:/.min_vruntime

testbox/testcase/testparams: lkp-wsx01/will-it-scale/performance-futex3

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
9211097 Â 0% -10.2% 8268370 Â 0% will-it-scale.per_thread_ops
9204866 Â 0% -10.2% 8266680 Â 0% will-it-scale.per_process_ops
0.63 Â 0% +6.3% 0.66 Â 0% will-it-scale.scalability
2.01 Â 1% +365.3% 9.35 Â 1% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wake.do_futex.sys_futex.system_call_fastpath
1 Â 34% +200.0% 4 Â 17% sched_debug.cpu#63.nr_uninterruptible
404 Â 23% +143.3% 984 Â 27% sched_debug.cpu#48.sched_count
434 Â 32% -68.0% 139 Â 20% sched_debug.cpu#61.ttwu_local
5335 Â 48% -54.4% 2433 Â 43% sched_debug.cpu#20.sched_goidle
576 Â 26% -46.8% 306 Â 34% sched_debug.cpu#55.ttwu_count
161 Â 27% +159.0% 418 Â 29% sched_debug.cpu#48.sched_goidle
11216 Â 46% -49.6% 5648 Â 38% sched_debug.cpu#20.nr_switches
11237 Â 46% -49.5% 5669 Â 38% sched_debug.cpu#20.sched_count
395 Â 23% +146.2% 972 Â 27% sched_debug.cpu#48.nr_switches
1461 Â 31% -46.9% 776 Â 23% sched_debug.cpu#42.ttwu_local
926 Â 19% -41.0% 546 Â 24% sched_debug.cpu#61.ttwu_count
856 Â 34% -50.2% 426 Â 31% sched_debug.cpu#58.ttwu_count
498 Â 23% -54.4% 227 Â 25% sched_debug.cpu#55.sched_goidle
3667 Â 25% -38.3% 2263 Â 18% sched_debug.cpu#42.nr_switches
3680 Â 25% -38.1% 2279 Â 18% sched_debug.cpu#42.sched_count
1593 Â 17% -45.1% 875 Â 20% sched_debug.cpu#61.nr_switches
155 Â 36% +182.5% 438 Â 40% sched_debug.cpu#48.ttwu_count
1604 Â 17% -44.8% 885 Â 20% sched_debug.cpu#61.sched_count
1245 Â 27% -52.5% 591 Â 21% sched_debug.cpu#55.sched_count
534 Â 11% +63.0% 871 Â 35% sched_debug.cpu#45.nr_switches
1232 Â 27% -52.8% 581 Â 21% sched_debug.cpu#55.nr_switches
545 Â 11% +62.0% 883 Â 35% sched_debug.cpu#45.sched_count
402 Â 22% +45.4% 585 Â 29% sched_debug.cpu#70.sched_goidle
595 Â 21% +108.8% 1242 Â 40% sched_debug.cpu#70.ttwu_count
575 Â 24% -39.7% 346 Â 21% sched_debug.cpu#61.sched_goidle
10 Â 26% -44.0% 5 Â 18% sched_debug.cpu#72.cpu_load[0]
7.76 Â 0% +15.3% 8.95 Â 0% perf-profile.cpu-cycles.get_futex_key.futex_wake.do_futex.sys_futex.system_call_fastpath
3149 Â 11% -15.0% 2677 Â 10% sched_debug.cpu#71.curr->pid
4.52 Â 2% -11.5% 4.00 Â 1% perf-profile.cpu-cycles.do_futex.sys_futex.system_call_fastpath.syscall
1401.34 Â 0% -10.1% 1260.10 Â 0% time.user_time
4.92 Â 0% -11.5% 4.36 Â 1% perf-profile.cpu-cycles.sys_futex.system_call_fastpath.syscall
12.56 Â 0% -12.2% 11.03 Â 1% perf-profile.cpu-cycles.system_call_after_swapgs.syscall
11.31 Â 0% -11.2% 10.05 Â 0% perf-profile.cpu-cycles.system_call.syscall
3.76 Â 0% -13.5% 3.25 Â 0% perf-profile.cpu-cycles.sysret_check.syscall
1.21 Â 1% -11.4% 1.08 Â 1% perf-profile.cpu-cycles.drop_futex_key_refs.isra.12.futex_wake.do_futex.sys_futex.system_call_fastpath
20.50 Â 0% -10.9% 18.25 Â 0% perf-profile.cpu-cycles.syscall
3007 Â 6% -11.8% 2654 Â 13% sched_debug.cpu#38.curr->pid

testbox/testcase/testparams: nhm4/will-it-scale/performance-futex3

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
2:5 -40% :5 kmsg.Spurious_LAPIC_timer_interrupt_on_cpu
11324367 Â 0% -12.0% 9969101 Â 0% will-it-scale.per_thread_ops
11271283 Â 0% -12.1% 9911001 Â 0% will-it-scale.per_process_ops
0.66 Â 0% +9.6% 0.72 Â 0% will-it-scale.scalability
1.67 Â 1% +509.7% 10.17 Â 3% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wake.do_futex.sys_futex.system_call_fastpath
1127 Â 31% -42.3% 651 Â 8% sched_debug.cfs_rq[3]:/.blocked_load_avg
799 Â 19% -34.8% 521 Â 26% sched_debug.cfs_rq[2]:/.blocked_load_avg
906 Â 17% -30.8% 627 Â 21% sched_debug.cfs_rq[2]:/.tg_load_contrib
1245 Â 28% -38.8% 762 Â 8% sched_debug.cfs_rq[3]:/.tg_load_contrib
0.47 Â 7% +119.7% 1.02 Â 5% perf-profile.cpu-cycles.ret_from_sys_call.syscall
16.48 Â 1% +56.0% 25.71 Â 2% perf-profile.cpu-cycles.futex_wake.do_futex.sys_futex.system_call_fastpath.syscall
153 Â 13% -35.3% 99 Â 22% sched_debug.cpu#6.load
969 Â 22% +48.4% 1438 Â 20% sched_debug.cfs_rq[4]:/.tg_load_contrib
133 Â 11% -28.7% 95 Â 22% sched_debug.cfs_rq[6]:/.load
827 Â 26% +55.5% 1286 Â 23% sched_debug.cfs_rq[4]:/.blocked_load_avg
6.55 Â 3% +22.3% 8.01 Â 1% perf-profile.cpu-cycles.get_futex_key.futex_wake.do_futex.sys_futex.system_call_fastpath
70 Â 10% +21.7% 85 Â 9% sched_debug.cpu#2.cpu_load[3]
68 Â 12% +23.5% 84 Â 9% sched_debug.cpu#2.cpu_load[4]
114 Â 4% -21.6% 89 Â 12% sched_debug.cpu#6.cpu_load[1]
103 Â 5% -21.8% 81 Â 9% sched_debug.cpu#6.cpu_load[2]
214345 Â 13% +31.5% 281773 Â 13% sched_debug.cfs_rq[2]:/.min_vruntime
37071 Â 15% +36.8% 50702 Â 15% sched_debug.cfs_rq[2]:/.exec_clock
1.22 Â 4% -15.5% 1.03 Â 9% perf-profile.cpu-cycles.drop_futex_key_refs.isra.12.do_futex.sys_futex.system_call_fastpath.syscall
42.44 Â 0% +13.8% 48.30 Â 2% perf-profile.cpu-cycles.do_futex.sys_futex.system_call_fastpath.syscall
50.65 Â 0% +11.9% 56.69 Â 2% perf-profile.cpu-cycles.system_call_fastpath.syscall
199.04 Â 0% -21.5% 156.27 Â 2% time.user_time
48.57 Â 0% +11.3% 54.04 Â 2% perf-profile.cpu-cycles.sys_futex.system_call_fastpath.syscall
38234 Â 5% -17.6% 31518 Â 1% softirqs.RCU
82318 Â 11% -20.7% 65237 Â 13% sched_debug.cpu#1.nr_load_updates
95 Â 7% -19.9% 76 Â 8% sched_debug.cpu#6.cpu_load[4]
12.54 Â 1% -14.4% 10.73 Â 3% perf-profile.cpu-cycles.system_call_after_swapgs.syscall
98 Â 7% -20.5% 78 Â 8% sched_debug.cpu#6.cpu_load[3]
11.27 Â 1% -16.4% 9.43 Â 1% perf-profile.cpu-cycles.system_call.syscall
324067 Â 8% -18.2% 265084 Â 14% sched_debug.cfs_rq[6]:/.min_vruntime
58381 Â 10% -23.2% 44812 Â 18% sched_debug.cfs_rq[6]:/.exec_clock
3.23 Â 2% -15.0% 2.75 Â 2% perf-profile.cpu-cycles.sysret_check.syscall
18.21 Â 1% -13.6% 15.74 Â 2% perf-profile.cpu-cycles.hash_futex.do_futex.sys_futex.system_call_fastpath.syscall
3053 Â 8% -18.2% 2499 Â 13% sched_debug.cpu#6.curr->pid
21140 Â 9% +17.5% 24849 Â 8% sched_debug.cfs_rq[2]:/.avg->runnable_avg_sum
27368 Â 7% -13.6% 23638 Â 8% sched_debug.cfs_rq[6]:/.avg->runnable_avg_sum
596 Â 7% -13.6% 515 Â 8% sched_debug.cfs_rq[6]:/.tg_runnable_contrib
39113 Â 2% +10.7% 43292 Â 4% cpuidle.C6-NHM.usage

testbox/testcase/testparams: ivb42/will-it-scale/performance-futex3

0429fbc0bdc297d6 76835b0ebf8a7fe85beb03c751
---------------- --------------------------
11973558 Â 0% -16.3% 10023607 Â 0% will-it-scale.per_thread_ops
11960916 Â 0% -16.5% 9989502 Â 0% will-it-scale.per_process_ops
0.61 Â 0% +9.2% 0.66 Â 0% will-it-scale.scalability
0.83 Â 4% +1394.0% 12.45 Â 0% perf-profile.cpu-cycles.get_futex_key_refs.isra.11.futex_wake.do_futex.sys_futex.system_call_fastpath
271 Â 46% +125.9% 613 Â 17% sched_debug.cpu#38.ttwu_local
327 Â 42% +155.8% 838 Â 41% sched_debug.cpu#45.ttwu_local
624 Â 25% +137.2% 1482 Â 30% sched_debug.cpu#27.sched_count
492 Â 20% +116.3% 1065 Â 12% sched_debug.cpu#38.sched_goidle
280 Â 14% +193.7% 822 Â 43% sched_debug.cpu#27.ttwu_count
1317 Â 23% +99.7% 2630 Â 12% sched_debug.cpu#38.nr_switches
146 Â 38% +34.1% 196 Â 36% sched_debug.cfs_rq[39]:/.blocked_load_avg
163 Â 37% +28.0% 209 Â 34% sched_debug.cfs_rq[39]:/.tg_load_contrib
613 Â 26% +139.4% 1469 Â 31% sched_debug.cpu#27.nr_switches
1560 Â 31% +37.3% 2142 Â 29% sched_debug.cpu#37.nr_switches
277 Â 27% +82.0% 504 Â 18% sched_debug.cpu#27.sched_goidle
15.50 Â 0% +96.1% 30.40 Â 0% perf-profile.cpu-cycles.futex_wake.do_futex.sys_futex.system_call_fastpath.syscall
1625 Â 38% -56.3% 710 Â 12% sched_debug.cpu#40.ttwu_local
1528 Â 21% +91.9% 2933 Â 35% sched_debug.cpu#45.nr_switches
1539 Â 21% +91.4% 2945 Â 35% sched_debug.cpu#45.sched_count
5876 Â 18% -35.8% 3771 Â 32% sched_debug.cpu#40.sched_count
447 Â 35% +59.3% 712 Â 22% sched_debug.cpu#44.sched_goidle
16 Â 35% -35.9% 10 Â 4% sched_debug.cpu#41.cpu_load[0]
948 Â 21% +49.5% 1418 Â 21% sched_debug.cpu#45.ttwu_count
5504 Â 5% -36.8% 3479 Â 14% sched_debug.cpu#10.sched_count
7 Â 34% +94.3% 14 Â 43% sched_debug.cpu#19.cpu_load[1]
7 Â 34% +56.8% 11 Â 17% sched_debug.cpu#19.cpu_load[3]
7 Â 34% +70.5% 12 Â 30% sched_debug.cpu#19.cpu_load[2]
15 Â 30% -33.2% 10 Â 4% sched_debug.cpu#41.cpu_load[1]
7.20 Â 0% +52.5% 10.98 Â 1% perf-profile.cpu-cycles.get_futex_key.futex_wake.do_futex.sys_futex.system_call_fastpath
3947 Â 12% -44.3% 2199 Â 44% sched_debug.cpu#14.ttwu_count
2594 Â 21% +38.4% 3591 Â 13% sched_debug.cpu#19.curr->pid
1120 Â 31% +55.1% 1738 Â 27% sched_debug.cpu#43.ttwu_count
270 Â 13% +14.0% 307 Â 8% numa-vmstat.node1.nr_mlock
270 Â 13% +14.0% 307 Â 8% numa-vmstat.node1.nr_unevictable
1082 Â 13% +13.8% 1232 Â 8% numa-meminfo.node1.Mlocked
1082 Â 13% +13.8% 1232 Â 8% numa-meminfo.node1.Unevictable
1461464 Â 17% -16.1% 1226347 Â 10% sched_debug.cfs_rq[43]:/.min_vruntime
3199 Â 1% +12.5% 3600 Â 11% sched_debug.cpu#44.curr->pid
1.27 Â 1% -19.5% 1.02 Â 1% perf-profile.cpu-cycles.drop_futex_key_refs.isra.12.do_futex.sys_futex.system_call_fastpath.syscall
37.32 Â 0% +30.8% 48.80 Â 0% perf-profile.cpu-cycles.do_futex.sys_futex.system_call_fastpath.syscall
44.13 Â 0% +23.4% 54.44 Â 0% perf-profile.cpu-cycles.system_call_fastpath.syscall
995.56 Â 0% -15.9% 836.90 Â 0% time.user_time
14 Â 28% -28.5% 10 Â 4% sched_debug.cpu#41.cpu_load[2]
13 Â 14% -23.1% 10 Â 4% sched_debug.cfs_rq[41]:/.runnable_load_avg
13 Â 16% -21.2% 10 Â 4% sched_debug.cfs_rq[41]:/.load
5.09 Â 2% -18.1% 4.17 Â 0% perf-profile.cpu-cycles.testcase
42.39 Â 0% +25.1% 53.01 Â 0% perf-profile.cpu-cycles.sys_futex.system_call_fastpath.syscall
20179 Â 46% +40.0% 28253 Â 5% sched_debug.cfs_rq[19]:/.exec_clock
12.68 Â 0% -20.3% 10.11 Â 0% perf-profile.cpu-cycles.system_call_after_swapgs.syscall
13.54 Â 0% -18.0% 11.10 Â 1% perf-profile.cpu-cycles.system_call.syscall
2.55 Â 0% -18.3% 2.08 Â 0% perf-profile.cpu-cycles.sysret_check.syscall
4928 Â 21% -32.7% 3319 Â 19% sched_debug.cpu#40.nr_switches
1547 Â 4% +12.9% 1746 Â 5% slabinfo.sock_inode_cache.num_objs
1547 Â 4% +12.9% 1746 Â 5% slabinfo.sock_inode_cache.active_objs
16.28 Â 0% -15.2% 13.80 Â 1% perf-profile.cpu-cycles.hash_futex.do_futex.sys_futex.system_call_fastpath.syscall
90590 Â 1% -12.1% 79668 Â 5% meminfo.DirectMap4k
4143 Â 5% -11.0% 3689 Â 8% sched_debug.cpu#37.curr->pid
956 Â 6% +8.3% 1036 Â 4% slabinfo.RAW.active_objs
956 Â 6% +8.3% 1036 Â 4% slabinfo.RAW.num_objs

lkp-wsx01: Westmere-EX
Memory: 128G

wsm: Westmere
Memory: 6G

lkp-snb01: Sandy Bridge-EP
Memory: 32G

lkp-sbx04: Sandy Bridge-EX
Memory: 64G

nhm4: Nehalem
Memory: 4G

lkp-g5: Westmere-EX
Memory: 2048G

ivb42: Ivytown Ivy Bridge-EP
Memory: 64G

To reproduce:

apt-get install ruby
git clone git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git
cd lkp-tests
bin/setup-local job.yaml # the job file attached in this email
bin/run-local job.yaml


Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.


Thanks,
Ying Huang

---
testcase: will-it-scale
default_monitors:
wait: pre-test
uptime:
iostat:
vmstat:
numa-numastat:
numa-vmstat:
numa-meminfo:
proc-vmstat:
proc-stat:
meminfo:
slabinfo:
interrupts:
lock_stat:
latency_stats:
softirqs:
bdi_dev_mapping:
diskstats:
cpuidle:
cpufreq:
turbostat:
sched_debug:
interval: 10
pmeter:
default_watchdogs:
watch-oom:
watchdog:
cpufreq_governor:
- performance
commit: b2776bf7149bddd1f4161f14f79520f17fc1d71d
model: Westmere
memory: 6G
nr_hdd_partitions: 1
hdd_partitions:
swap_partitions:
rootfs_partition:
perf-profile:
freq: 800
will-it-scale:
test:
- futex3
testbox: wsm
tbox_group: wsm
kconfig: x86_64-rhel
enqueue_time: 2014-12-08 17:37:18.481125353 +08:00
head_commit: 6888df25fb1b446378b0ea9ea978c18f492f4ee1
base_commit: b2776bf7149bddd1f4161f14f79520f17fc1d71d
branch: linux-devel/devel-hourly-2014120811
kernel: "/kernel/x86_64-rhel/b2776bf7149bddd1f4161f14f79520f17fc1d71d/vmlinuz-3.18.0-gb2776bf"
user: lkp
queue: cyclic
rootfs: debian-x86_64.cgz
result_root: "/result/wsm/will-it-scale/performance-futex3/debian-x86_64.cgz/x86_64-rhel/b2776bf7149bddd1f4161f14f79520f17fc1d71d/0"
job_file: "/lkp/scheduled/wsm/cyclic_will-it-scale-performance-futex3-x86_64-rhel-BASE-b2776bf7149bddd1f4161f14f79520f17fc1d71d-0.yaml"
dequeue_time: 2014-12-08 18:28:43.298150793 +08:00
job_state: finished
loadavg: 9.13 4.96 2.02 1/149 5821
start_time: '1418034554'
end_time: '1418034858'
version: "/lkp/lkp/.src-20141206-060219"
echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu10/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu11/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu2/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu3/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu4/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu5/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu6/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu7/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu8/cpufreq/scaling_governor
echo performance > /sys/devices/system/cpu/cpu9/cpufreq/scaling_governor
./runtest.py futex3 32 1 6 9 12
_______________________________________________
LKP mailing list
LKP@xxxxxxxxxxxxxxx