2.6.27.9: splice_to_pipe() hung (blocked for more than 120 seconds)

From: Vegard Nossum
Date: Fri Jan 16 2009 - 13:56:24 EST


Hi,

Seeing some recent splice() discussions, I decided to explore this
system call. I have written a program which might look, well, not very
useful, but the fact is that it creates an unkillable zombie process.
Another funny side effect is that system load continually rises, even
though the system seems to stay fully interactive and functional.

After a while, I also get some messages like this:

Jan 15 20:11:37 localhost kernel: INFO: task a.out:7149 blocked for
more than 120 seconds.
Jan 15 20:11:37 localhost kernel: "echo 0 >
/proc/sys/kernel/hung_task_timeout_secs" disables this message.
Jan 15 20:11:37 localhost kernel: a.out D ec6e2610 0 7149 1
Jan 15 20:11:37 localhost kernel: ec5aad44 00000082 c042451f
ec6e2610 00989680 c07da67c c07ddb80 c07ddb80
Jan 15 20:11:37 localhost kernel: c07ddb80 ec6e4c20 ec6e4e7c
c201db80 00000001 c201db80 470fed45 0000036b
Jan 15 20:11:37 localhost kernel: ec5aad38 c0421027 ec6e263c
ec6e4e7c ec6e3fa8 85c129f4 ec6e4c20 ec6e4c20
Jan 15 20:11:37 localhost kernel: Call Trace:
Jan 15 20:11:37 localhost kernel: [<c064420f>] __mutex_lock_common+0x8a/0xd9
Jan 15 20:11:37 localhost kernel: [<c0644302>] __mutex_lock_slowpath+0x12/0x15
Jan 15 20:11:37 localhost kernel: [<c0644181>] mutex_lock+0x29/0x2d
Jan 15 20:11:37 localhost kernel: [<c04aa8f1>] splice_to_pipe+0x23/0x1f5
Jan 15 20:11:37 localhost kernel: [<c04ab290>]
__generic_file_splice_read+0x3ff/0x413
Jan 15 20:11:37 localhost kernel: [<c04ab324>]
generic_file_splice_read+0x80/0x9a
Jan 15 20:11:37 localhost kernel: [<c04a9e95>] do_splice_to+0x4e/0x5f
Jan 15 20:11:37 localhost kernel: [<c04aa010>] sys_splice+0x16a/0x1c8
Jan 15 20:11:37 localhost kernel: [<c0403cca>] syscall_call+0x7/0xb
Jan 15 20:11:37 localhost kernel: =======================

(but this was from such a system with 6 zombies and ~80 load. See
attachments for SysRq report with processes in blocked state, it has
similar info but for just one zombie.)

This happens with 2.6.27.9-73.fc9.i686 kernel. Maybe it was fixed
recently? (In any case, I don't think it is a regression.)

It seems to be not 100% reproducible. Sometimes it works, sometimes
not. Start the program, then after a while hit Ctrl-C. If it doesn't
exit, zombie count will rise and system state will be as described.
Compile with -lpthread.


Vegard

--
"The animistic metaphor of the bug that maliciously sneaked in while
the programmer was not looking is intellectually dishonest as it
disguises that the error is the programmer's own creation."
-- E. W. Dijkstra, EWD1036
#define _GNU_SOURCE

#include <sys/socket.h>
#include <sys/types.h>

#include <fcntl.h>
#include <errno.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

static int sock_fd[2];
static int pipe_fd[2];

static void *do_file_to_pipe(void *unused)
{
int splice_len = 3;

while (1) {
char filename[80];
char command[80];
int file_fd;

sprintf(filename, "splice-%d-%d-%d", getpid(), pthread_self(), splice_len);
sprintf(command, "dmesg > %s", filename);
system(command);

file_fd = open(filename, O_RDONLY);
if (file_fd == -1)
exit(EXIT_FAILURE);

while (1) {
int n;

n = splice(file_fd, NULL, pipe_fd[1], NULL,
splice_len, 0);
if (n == -1)
exit(EXIT_FAILURE);
if (n == 0)
break;
}

close(file_fd);

sprintf(command, "rm %s", filename);
system(command);

splice_len += rand() % 512;
if (splice_len >= 16384)
splice_len -= 16384;
}

return NULL;
}

static void *do_pipe_to_sock(void *unused)
{
int splice_len = 383;

while (1) {
if (splice(pipe_fd[0], NULL, sock_fd[1], NULL,
splice_len, 0) == -1)
{
exit(EXIT_FAILURE);
}

splice_len += rand() % 512;
if (splice_len >= 16384)
splice_len -= 16384;
}

return NULL;
}

static void *do_sock_to_sock(void *unused)
{
while (1) {
unsigned int n;
char *buf;

n = rand() % 16384;
buf = malloc(n);

if (read(sock_fd[0], buf, n) == -1)
exit(EXIT_FAILURE);

free(buf);
}
}

#define N_THREADS 3

int main(int argc, char *argv[])
{
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fd) == -1)
exit(EXIT_FAILURE);

if (pipe(pipe_fd) == -1)
exit(EXIT_FAILURE);

pthread_t file_to_pipe_thread[N_THREADS];
pthread_t pipe_to_sock_thread[N_THREADS];
pthread_t sock_to_sock_thread[N_THREADS];

int i;
for (i = 0; i < N_THREADS; ++i) {
pthread_create(&file_to_pipe_thread[i], NULL,
&do_file_to_pipe, NULL);
pthread_create(&pipe_to_sock_thread[i], NULL,
&do_pipe_to_sock, NULL);
pthread_create(&sock_to_sock_thread[i], NULL,
&do_sock_to_sock, NULL);
}

while (1)
sleep(60);

return EXIT_SUCCESS;
}
SysRq : Show Blocked State
task PC stack pid father
a.out D c04135fb 0 24080 24010
c3425d44 00200086 00000000 c04135fb 38330002 c07da67c c07ddb80 c07ddb80
c07ddb80 f43c0cb0 f43c0f0c c201db80 00000001 c201db80 2f4164b8 00002151
c3425d38 c3589500 b803a2a4 f43c0f0c c3589500 c3425e78 c3425e93 c3425e7c
Call Trace:
[<c04135fb>] ? native_flush_tlb_others+0xbe/0xc7
[<c0506330>] ? vsnprintf+0x523/0x56e
[<c064420f>] __mutex_lock_common+0x8a/0xd9
[<c0644302>] __mutex_lock_slowpath+0x12/0x15
[<c0644181>] ? mutex_lock+0x29/0x2d
[<c0644181>] mutex_lock+0x29/0x2d
[<c04aa8f1>] splice_to_pipe+0x23/0x1f5
[<c04ab290>] __generic_file_splice_read+0x3ff/0x413
[<c049f3b1>] ? dput+0x34/0x107
[<c0495f4e>] ? path_to_nameidata+0x11/0x2a
[<c04a3041>] ? mntput_no_expire+0x1a/0xf7
[<c0495f3b>] ? mntput+0x13/0x15
[<c0496017>] ? path_put+0x15/0x18
[<c0497bfa>] ? path_walk+0x92/0x9b
[<c0497db0>] ? do_path_lookup+0x12d/0x175
[<c0490923>] ? file_move+0x36/0x3b
[<c048e6cd>] ? __dentry_open+0x131/0x1fc
[<c048e81f>] ? nameidata_to_filp+0x1f/0x33
[<c048e264>] ? generic_file_open+0x0/0x47
[<c0498340>] ? do_filp_open+0x31c/0x611
[<c04aa0a5>] ? spd_release_page+0x0/0xf
[<c04ab324>] generic_file_splice_read+0x80/0x9a
[<c04a9e95>] do_splice_to+0x4e/0x5f
[<c04aa010>] sys_splice+0x16a/0x1c8
[<c0403cca>] syscall_call+0x7/0xb
[<c064007b>] ? init_intel+0x125/0x275
=======================
a.out D c3478e04 0 24081 24010
c3478e64 00200086 c3478e04 c3478e04 00200a02 c07da67c c07ddb80 c07ddb80
c07ddb80 f43c2610 f43c286c c2012b80 00000000 c2012b80 c04276f5 00000040
c07ddb80 c3589500 c2012b80 f43c286c 022abf5d f6b22610 00f90001 00f90002
Call Trace:
[<c04276f5>] ? finish_task_switch+0x98/0xb0
[<c064420f>] __mutex_lock_common+0x8a/0xd9
[<c0644302>] __mutex_lock_slowpath+0x12/0x15
[<c0644181>] ? mutex_lock+0x29/0x2d
[<c0644181>] mutex_lock+0x29/0x2d
[<c0495555>] pipe_wait+0x69/0x70
[<c043c166>] ? autoremove_wake_function+0x0/0x33
[<c04aa616>] __splice_from_pipe+0x17c/0x1c5
[<c04a9a0d>] ? pipe_to_sendpage+0x0/0x61
[<c04aa8a2>] splice_from_pipe+0x6f/0x86
[<c04aa8cc>] generic_splice_sendpage+0x13/0x15
[<c04a9a0d>] ? pipe_to_sendpage+0x0/0x61
[<c04a9e1a>] do_splice_from+0x53/0x64
[<c04a9f7e>] sys_splice+0xd8/0x1c8
[<c0403cca>] syscall_call+0x7/0xb
[<c064007b>] ? init_intel+0x125/0x275
=======================
a.out D c382db60 0 24084 24010
c43fdd44 00200086 00000000 c382db60 383552cc c07da67c c07ddb80 c07ddb80
c07ddb80 f6b232c0 f6b2351c c201db80 00000001 c201db80 2e93a8e7 00002151
c43fdd38 c046cd1f f7406aac f6b2351c c43fdd24 c041e5b7 c43fdd2c c0643c05
Call Trace:
[<c046cd1f>] ? find_get_page+0x1d/0x81
[<c041e5b7>] ? need_resched+0x18/0x22
[<c0643c05>] ? _cond_resched+0x8/0x32
[<c046cf57>] ? lock_page+0x1a/0x2f
[<c064420f>] __mutex_lock_common+0x8a/0xd9
[<c0644302>] __mutex_lock_slowpath+0x12/0x15
[<c0644181>] ? mutex_lock+0x29/0x2d
[<c0644181>] mutex_lock+0x29/0x2d
[<c04aa8f1>] splice_to_pipe+0x23/0x1f5
[<c04ab290>] __generic_file_splice_read+0x3ff/0x413
[<c049f3b1>] ? dput+0x34/0x107
[<c0495f4e>] ? path_to_nameidata+0x11/0x2a
[<c04a3041>] ? mntput_no_expire+0x1a/0xf7
[<c0495f3b>] ? mntput+0x13/0x15
[<c0496017>] ? path_put+0x15/0x18
[<c0497bfa>] ? path_walk+0x92/0x9b
[<c0497db0>] ? do_path_lookup+0x12d/0x175
[<c0490923>] ? file_move+0x36/0x3b
[<c048e6cd>] ? __dentry_open+0x131/0x1fc
[<c048e81f>] ? nameidata_to_filp+0x1f/0x33
[<c048e264>] ? generic_file_open+0x0/0x47
[<c0498340>] ? do_filp_open+0x31c/0x611
[<c04aa0a5>] ? spd_release_page+0x0/0xf
[<c04ab324>] generic_file_splice_read+0x80/0x9a
[<c04a9e95>] do_splice_to+0x4e/0x5f
[<c04aa010>] sys_splice+0x16a/0x1c8
[<c0403cca>] syscall_call+0x7/0xb
[<c064007b>] ? init_intel+0x125/0x275
=======================
a.out D f42e4e68 0 24086 24010
f42e4ea8 00200086 00000000 f42e4e68 c041e8e3 c07da67c c07ddb80 c07ddb80
c07ddb80 f4e5bf70 f4e5c1cc c201db80 00000001 c201db80 2bb6aaab 00002151
c201dbbc 00007acc f42e4e94 f4e5c1cc f43c1960 01840000 f43c1960 c201db80
Call Trace:
[<c041e8e3>] ? update_curr+0x94/0xdc
[<c04212cf>] ? pick_next_task_fair+0x99/0xa3
[<c064420f>] __mutex_lock_common+0x8a/0xd9
[<c0644302>] __mutex_lock_slowpath+0x12/0x15
[<c0644181>] ? mutex_lock+0x29/0x2d
[<c0644181>] mutex_lock+0x29/0x2d
[<c049fb9c>] inode_double_lock+0x44/0x48
[<c04aa895>] splice_from_pipe+0x62/0x86
[<c04aa8cc>] generic_splice_sendpage+0x13/0x15
[<c04a9a0d>] ? pipe_to_sendpage+0x0/0x61
[<c04a9e1a>] do_splice_from+0x53/0x64
[<c04a9f7e>] sys_splice+0xd8/0x1c8
[<c0403cca>] syscall_call+0x7/0xb
[<c064007b>] ? init_intel+0x125/0x275
=======================
a.out D f4388cfc 0 24088 24010
f4388d44 00200086 00000000 f4388cfc 3839e289 c07da67c c07ddb80 c07ddb80
c07ddb80 c3481960 c3481bbc c201db80 00000001 c201db80 2ed24233 00002151
ffffffff c3589500 b44342a4 c3481bbc c3589500 f4388e78 f4388e93 f4388e7c
Call Trace:
[<c0506330>] ? vsnprintf+0x523/0x56e
[<c064420f>] __mutex_lock_common+0x8a/0xd9
[<c0644302>] __mutex_lock_slowpath+0x12/0x15
[<c0644181>] ? mutex_lock+0x29/0x2d
[<c0644181>] mutex_lock+0x29/0x2d
[<c04aa8f1>] splice_to_pipe+0x23/0x1f5
[<c04ab290>] __generic_file_splice_read+0x3ff/0x413
[<c049f3b1>] ? dput+0x34/0x107
[<c0495f4e>] ? path_to_nameidata+0x11/0x2a
[<c04a3041>] ? mntput_no_expire+0x1a/0xf7
[<c0495f3b>] ? mntput+0x13/0x15
[<c0496017>] ? path_put+0x15/0x18
[<c0497bfa>] ? path_walk+0x92/0x9b
[<c0497db0>] ? do_path_lookup+0x12d/0x175
[<c0490923>] ? file_move+0x36/0x3b
[<c048e6cd>] ? __dentry_open+0x131/0x1fc
[<c048e81f>] ? nameidata_to_filp+0x1f/0x33
[<c048e264>] ? generic_file_open+0x0/0x47
[<c0498340>] ? do_filp_open+0x31c/0x611
[<c04aa0a5>] ? spd_release_page+0x0/0xf
[<c04ab324>] generic_file_splice_read+0x80/0x9a
[<c04a9e95>] do_splice_to+0x4e/0x5f
[<c04aa010>] sys_splice+0x16a/0x1c8
[<c0403cca>] syscall_call+0x7/0xb
[<c064007b>] ? init_intel+0x125/0x275
=======================
a.out D c34f0e68 0 24090 24010
c34f0ea8 00200086 00000000 c34f0e68 c041e8e3 c07da67c c07ddb80 c07ddb80
c07ddb80 f4d8cc20 f4d8ce7c c201db80 00000001 c201db80 2bdfe908 00002151
c201dbbc 00b85f4c c34f0e94 f4d8ce7c c3484c20 01840000 c3484c20 c201db80
Call Trace:
[<c041e8e3>] ? update_curr+0x94/0xdc
[<c04212cf>] ? pick_next_task_fair+0x99/0xa3
[<c064420f>] __mutex_lock_common+0x8a/0xd9
[<c0644302>] __mutex_lock_slowpath+0x12/0x15
[<c0644181>] ? mutex_lock+0x29/0x2d
[<c0644181>] mutex_lock+0x29/0x2d
[<c049fb95>] inode_double_lock+0x3d/0x48
[<c04aa895>] splice_from_pipe+0x62/0x86
[<c04aa8cc>] generic_splice_sendpage+0x13/0x15
[<c04a9a0d>] ? pipe_to_sendpage+0x0/0x61
[<c04a9e1a>] do_splice_from+0x53/0x64
[<c04a9f7e>] sys_splice+0xd8/0x1c8
[<c0403cca>] syscall_call+0x7/0xb
[<c064007b>] ? init_intel+0x125/0x275
=======================
Sched Debug Version: v0.07, 2.6.27.9-73.fc9.i686 #1
now at 69957132.969319 msecs
.sysctl_sched_latency : 40.000000
.sysctl_sched_min_granularity : 8.000000
.sysctl_sched_wakeup_granularity : 10.000000
.sysctl_sched_child_runs_first : 0.000001
.sysctl_sched_features : 15999

cpu#0, 1462.986 MHz
.nr_running : 2
.load : 2048
.nr_switches : 31810173
.nr_load_updates : 15616636
.nr_uninterruptible : 13815
.jiffies : 69657131
.next_balance : 69.657382
.curr->pid : 30879
.clock : 69957131.018861
.cpu_load[0] : 2048
.cpu_load[1] : 1284
.cpu_load[2] : 745
.cpu_load[3] : 454
.cpu_load[4] : 301

cfs_rq[0]:
.exec_clock : 5860957.949602
.MIN_vruntime : 6558549.587243
.min_vruntime : 6558589.286460
.max_vruntime : 6558549.587243
.spread : 0.000000
.spread0 : 0.000000
.nr_running : 2
.load : 2048
.yld_exp_empty : 0
.yld_act_empty : 0
.yld_both_empty : 0
.yld_count : 14080
.sched_switch : 0
.sched_count : 39326166
.sched_goidle : 15138799
.ttwu_count : 18079305
.ttwu_local : 15504357
.bkl_count : 184463
.nr_spread_over : 5186117

rt_rq[0]:
.rt_nr_running : 0
.rt_throttled : 0
.rt_time : 0.000000
.rt_runtime : 950.000000

runnable tasks:
task PID tree-key switches prio exec-runtime sum-exec sum-sleep
----------------------------------------------------------------------------------------------------------
Xorg 2523 6558549.587243 3255230 120 6558549.587243 603389.950311 68802078.654856
R bash 30879 6558544.000849 240 120 6558544.000849 137.244077 33240434.061067

cpu#1, 1462.986 MHz
.nr_running : 1
.load : 1024
.nr_switches : 48072666
.nr_load_updates : 17440854
.nr_uninterruptible : 4294953487
.jiffies : 69657131
.next_balance : 69.657194
.curr->pid : 1894
.clock : 69957133.263761
.cpu_load[0] : 1024
.cpu_load[1] : 936
.cpu_load[2] : 784
.cpu_load[3] : 681
.cpu_load[4] : 633

cfs_rq[1]:
.exec_clock : 12161500.468689
.MIN_vruntime : 0.000001
.min_vruntime : 11413893.898667
.max_vruntime : 0.000001
.spread : 0.000000
.spread0 : 4855304.612207
.nr_running : 1
.load : 1024
.yld_exp_empty : 0
.yld_act_empty : 0
.yld_both_empty : 0
.yld_count : 9202
.sched_switch : 0
.sched_count : 49124758
.sched_goidle : 17009764
.ttwu_count : 26222118
.ttwu_local : 24223831
.bkl_count : 100677
.nr_spread_over : 4048979

rt_rq[1]:
.rt_nr_running : 0
.rt_throttled : 0
.rt_time : 0.000000
.rt_runtime : 950.000000

runnable tasks:
task PID tree-key switches prio exec-runtime sum-exec sum-sleep
----------------------------------------------------------------------------------------------------------
R rsyslogd 1894 11413855.951887 3980 120 11413855.951887 582.158037 69919323.934143