[PATCH] splice: fix deadlock

From: Vegard Nossum
Date: Sun Jan 18 2009 - 10:07:29 EST


splice from a file or socket needs to lock both the file/socket and the
pipe inodes. pipe_wait() unlocks only the pipe lock, regardless of the
order in which they were taken.

This patch adds an additional (optional) argument to pipe_wait(); if
specified, pipe_wait() will unlock and reacquire both inode locks.

It fixes the lockup that occurs with the test-case below, but please
review carefully. Consider this patch a proposal rather than a fix.

#define _GNU_SOURCE

#include <sys/socket.h>
#include <sys/types.h>

#include <fcntl.h>
#include <errno.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

static int sock_fd[2];
static int pipe_fd[2];

#define N 16384

static void *do_write(void *unused)
{
unsigned int i;

for (i = 0; i < N; ++i)
write(pipe_fd[1], "x", 1);

return NULL;
}

static void *do_read(void *unused)
{
unsigned int i;
char c;

for (i = 0; i < N; ++i)
read(sock_fd[0], &c, 1);

return NULL;
}

static void *do_splice(void *unused)
{
unsigned int i;

for (i = 0; i < N; ++i)
splice(pipe_fd[0], NULL, sock_fd[1], NULL, 1, 0);

return NULL;
}

int main(int argc, char *argv[])
{
pthread_t writer;
pthread_t reader;
pthread_t splicer[2];

while (1) {
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fd) == -1)
exit(EXIT_FAILURE);

if (pipe(pipe_fd) == -1)
exit(EXIT_FAILURE);

pthread_create(&writer, NULL, &do_write, NULL);
pthread_create(&reader, NULL, &do_read, NULL);
pthread_create(&splicer[0], NULL, &do_splice, NULL);
pthread_create(&splicer[1], NULL, &do_splice, NULL);

pthread_join(writer, NULL);
pthread_join(reader, NULL);
pthread_join(splicer[0], NULL);
pthread_join(splicer[1], NULL);

printf("failed to deadlock, retrying...\n");
}

return EXIT_SUCCESS;
}

Cc: Jens Axboe <axboe@xxxxxxx>
Cc: Eric Dumazet <dada1@xxxxxxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Vegard Nossum <vegard.nossum@xxxxxxxxx>
---
fs/fifo.c | 2 +-
fs/pipe.c | 19 +++++++++++--------
fs/splice.c | 8 ++++----
include/linux/pipe_fs_i.h | 4 ++--
4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/fs/fifo.c b/fs/fifo.c
index f8f97b8..bd2bfc6 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -20,7 +20,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
int cur = *cnt;

while (cur == *cnt) {
- pipe_wait(inode->i_pipe);
+ pipe_wait(inode->i_pipe, NULL);
if (signal_pending(current))
break;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a48ba5..eb7b893 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -37,8 +37,13 @@
* -- Manfred Spraul <manfred@xxxxxxxxxxxxxxxx> 2002-05-09
*/

-/* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct pipe_inode_info *pipe)
+/*
+ * Drop the inode semaphore and wait for a pipe event, atomically.
+ *
+ * inode2 specifies another inode lock to drop. May be NULL if no such other
+ * inode exists.
+ */
+void pipe_wait(struct pipe_inode_info *pipe, struct inode *inode2)
{
DEFINE_WAIT(wait);

@@ -47,12 +52,10 @@ void pipe_wait(struct pipe_inode_info *pipe)
* is considered a noninteractive wait:
*/
prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
+ inode_double_unlock(pipe->inode, inode2);
schedule();
finish_wait(&pipe->wait, &wait);
- if (pipe->inode)
- mutex_lock(&pipe->inode->i_mutex);
+ inode_double_lock(pipe->inode, inode2);
}

static int
@@ -377,7 +380,7 @@ redo:
wake_up_interruptible_sync(&pipe->wait);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- pipe_wait(pipe);
+ pipe_wait(pipe, NULL);
}
mutex_unlock(&inode->i_mutex);

@@ -550,7 +553,7 @@ redo2:
do_wakeup = 0;
}
pipe->waiting_writers++;
- pipe_wait(pipe);
+ pipe_wait(pipe, NULL);
pipe->waiting_writers--;
}
out:
diff --git a/fs/splice.c b/fs/splice.c
index 4ed0ba4..e79e906 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -240,7 +240,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
}

pipe->waiting_writers++;
- pipe_wait(pipe);
+ pipe_wait(pipe, NULL);
pipe->waiting_writers--;
}

@@ -690,7 +690,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
do_wakeup = 0;
}

- pipe_wait(pipe);
+ pipe_wait(pipe, sd->u.file->f_mapping->host);
}

if (do_wakeup) {
@@ -1523,7 +1523,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
break;
}
}
- pipe_wait(pipe);
+ pipe_wait(pipe, NULL);
}

mutex_unlock(&pipe->inode->i_mutex);
@@ -1563,7 +1563,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
break;
}
pipe->waiting_writers++;
- pipe_wait(pipe);
+ pipe_wait(pipe, NULL);
pipe->waiting_writers--;
}

diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8e41202..be2d399 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -134,8 +134,8 @@ struct pipe_buf_operations {
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
#define PIPE_SIZE PAGE_SIZE

-/* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct pipe_inode_info *pipe);
+/* Drop the inode semaphore(s) and wait for a pipe event, atomically */
+void pipe_wait(struct pipe_inode_info *pipe, struct inode *inode2);

struct pipe_inode_info * alloc_pipe_info(struct inode * inode);
void free_pipe_info(struct inode * inode);
--
1.5.6.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/