lioevent: This patch adds POSIX listio completion notification event. This works by adding an IOCB_CMD_EVENT command. As part of listio submission, the user may choose to create an empty aiocb with an aio_lio_opcode of IOCB_CMD_EVENT and filling only the aio_sigevent fields. Upon list completion, the kernel notifies the application using those sigevent parameters. Makefile | 2 fs/aio.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/aio.h | 14 +++- include/linux/aio_abi.h | 1 4 files changed, 164 insertions(+), 8 deletions(-) Signed-off-by: Sébastien Dugué Index: linux-2.6.12/fs/aio.c =================================================================== --- linux-2.6.12.orig/fs/aio.c 2005-07-08 14:38:39.000000000 +0200 +++ linux-2.6.12/fs/aio.c 2005-07-21 11:43:17.000000000 +0200 @@ -410,6 +410,7 @@ req->ki_signo = 0; req->ki_notify = 0; req->ki_value = 0; + req->ki_lio_event = NULL; req->private = NULL; INIT_LIST_HEAD(&req->ki_run_list); @@ -953,6 +954,101 @@ read_unlock(&tasklist_lock); } +static inline void check_lio(struct kioctx *ctx, + struct lio_event *lio) +{ + int ret; + + if (lio == NULL) + return; + + ret = atomic_dec_and_test(&(lio->lio_users)); + + if (lio->lio_signo) { + + if (likely(ret)) { + + /* last one -> notify process */ + __aio_send_signal(lio->lio_pid, + lio->lio_signo, + lio->lio_notify, + (void*)(unsigned long)lio->lio_value); + + /* free memory */ + + kfree(lio); + } + + } +} + +static int create_lio_event(struct kioctx *ctx,struct lio_event **lio, + pid_t pid, int signo, void *value) +{ + int notify; + + if (pid == 0) { + /* notify itself */ + + pid = current->pid; + notify = IO_NOTIFY_SIGNAL; + } else { + pid_t group_id; + task_t *ptask; + + /* notify given thread */ + + /* caller thread and target thread must be in same + * thread group + */ + + read_lock(&tasklist_lock); + ptask = find_task_by_pid(pid); + + if (unlikely (ptask == NULL)) { + read_unlock(&tasklist_lock); + return -EFAULT; + } + + group_id = ptask->tgid; + read_unlock(&tasklist_lock); + + if (group_id != current->tgid) + return -EINVAL; + + notify = IO_NOTIFY_THREAD_ID; + } + + /* we break an existing sequence (lio exists) + * close the sequence + */ + + check_lio(ctx, *lio); + + if (signo == 0) { + + /* it means we don't want to monitor following commands + * usefull to break current sequence + */ + *lio = NULL; + + return 0; + } + + + *lio = kmalloc(sizeof(*lio), GFP_KERNEL); + if (!*lio) + return -EAGAIN; + + atomic_set(&((*lio)->lio_users), 1); + (*lio)->lio_pid = pid; + (*lio)->lio_signo = signo; + (*lio)->lio_notify = notify; + (*lio)->lio_value = (__u64)(unsigned long)value; + + return 0; +} + static void __aio_write_evt(struct kioctx *ctx, struct io_event *event) { struct aio_ring_info *info; @@ -1057,6 +1153,8 @@ __aio_send_signal(iocb->ki_pid, iocb->ki_signo, iocb->ki_notify, (void*)(unsigned long)iocb->ki_value); + check_lio(ctx, iocb->ki_lio_event); + pr_debug("%ld retries: %d of %d\n", iocb->ki_retried, iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); put_rq: @@ -1440,6 +1538,7 @@ if (file->f_op->aio_fsync) ret = file->f_op->aio_fsync(iocb, 0); + return ret; } @@ -1528,7 +1627,7 @@ } int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb) + struct iocb *iocb, struct lio_event *lio) { struct kiocb *req; struct file *file; @@ -1650,6 +1749,7 @@ req->ki_notify = notify; req->ki_value = aio_value; } + req->ki_lio_event = lio; req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; req->ki_left = req->ki_nbytes = iocb->aio_nbytes; @@ -1697,6 +1797,7 @@ asmlinkage long sys_io_submit(aio_context_t ctx_id, long nr, struct iocb __user * __user *iocbpp) { + struct lio_event *lio; struct kioctx *ctx; long ret = 0; int i; @@ -1717,13 +1818,15 @@ * AKPM: should this return a partial result if some of the IOs were * successfully submitted? */ + + lio = NULL; for (i=0; isigev_value.sival_int))) { + ret = -EFAULT; + break; + } } - ret = io_submit_one(ctx, user_iocb, &tmp); - if (ret) - break; + if (tmp.aio_lio_opcode == IOCB_CMD_EVENT) { + + /* this command means that all following IO commands + * are in the same group and when the group becomes + * empty (all requests have been processed) + * we must send a signal to a given process or thread + */ + + ret = create_lio_event(ctx, &lio, + aio_pid, aio_signo, + (void *)aio_value); + if (ret) + break; + + continue; + } + + if (lio && ((tmp.aio_lio_opcode == IOCB_CMD_PREAD) || + (tmp.aio_lio_opcode == IOCB_CMD_PWRITE)) ) { + + atomic_inc(&lio->lio_users); + ret = io_submit_one(ctx, user_iocb, &tmp, lio); + + /* + * If a request failed, just decrement the users count, + * but go on submitting subsequent requests. + * + */ + if (ret) + atomic_dec(&lio->lio_users); + + continue; + + } else { + ret = io_submit_one(ctx, user_iocb, &tmp, NULL); + if (ret) + break; + } } + check_lio(ctx, lio); put_ioctx(ctx); return i ? i : ret; Index: linux-2.6.12/include/linux/aio_abi.h =================================================================== --- linux-2.6.12.orig/include/linux/aio_abi.h 2005-07-08 14:38:39.000000000 +0200 +++ linux-2.6.12/include/linux/aio_abi.h 2005-07-21 11:42:56.000000000 +0200 @@ -41,6 +41,7 @@ * IOCB_CMD_POLL = 5, */ IOCB_CMD_NOOP = 6, + IOCB_CMD_EVENT = 7, }; /* read() from /dev/aio returns these structures. */ Index: linux-2.6.12/include/linux/aio.h =================================================================== --- linux-2.6.12.orig/include/linux/aio.h 2005-07-08 14:38:39.000000000 +0200 +++ linux-2.6.12/include/linux/aio.h 2005-07-21 11:30:02.000000000 +0200 @@ -48,6 +48,14 @@ #define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags) #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags) +struct lio_event { + atomic_t lio_users; + __s32 lio_pid; + __u16 lio_signo; + __u16 lio_notify; + __u64 lio_value; +}; + struct kiocb { struct list_head ki_run_list; long ki_flags; @@ -83,6 +91,7 @@ __u16 ki_signo; __u16 ki_notify; __u64 ki_value; + struct lio_event *ki_lio_event; void *private; }; @@ -176,12 +185,13 @@ extern void FASTCALL(exit_aio(struct mm_struct *mm)); extern struct kioctx *lookup_ioctx(unsigned long ctx_id); extern int FASTCALL(io_submit_one(struct kioctx *ctx, - struct iocb __user *user_iocb, struct iocb *iocb)); + struct iocb __user *user_iocb, struct iocb *iocb, + struct lio_event *lio)); /* semi private, but used by the 32bit emulations: */ struct kioctx *lookup_ioctx(unsigned long ctx_id); int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb)); + struct iocb *iocb, struct lio_event *lio)); #define get_ioctx(kioctx) do { if (unlikely(atomic_read(&(kioctx)->users) <= 0)) BUG(); atomic_inc(&(kioctx)->users); } while (0) #define put_ioctx(kioctx) do { if (unlikely(atomic_dec_and_test(&(kioctx)->users))) __put_ioctx(kioctx); else if (unlikely(atomic_read(&(kioctx)->users) < 0)) BUG(); } while (0) Index: linux-2.6.12/Makefile =================================================================== --- linux-2.6.12.orig/Makefile 2005-07-08 14:38:39.000000000 +0200 +++ linux-2.6.12/Makefile 2005-07-21 11:42:56.000000000 +0200 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 12 -EXTRAVERSION = .PAIO-aioevent +EXTRAVERSION = .PAIO-lioevent NAME=Woozy Numbat # *DOCUMENTATION*