[PATCH 7/7] eventpoll: add method for configuring minimum wait on epoll context

From: Jens Axboe
Date: Thu Dec 01 2022 - 13:12:41 EST


Add support for EPOLL_CTL_MIN_WAIT, which can be used to define a
minimum reap time for an epoll context.

Basic test case:

struct d {
int p1, p2;
};

static void *fn(void *data)
{
struct d *d = data;
char b = 0x89;

/* Generate 2 events 20 msec apart */
usleep(10000);
write(d->p1, &b, sizeof(b));
usleep(10000);
write(d->p2, &b, sizeof(b));

return NULL;
}

int main(int argc, char *argv[])
{
struct epoll_event ev, events[2];
pthread_t thread;
int p1[2], p2[2];
struct d d;
int efd, ret;

efd = epoll_create1(0);
if (efd < 0) {
perror("epoll_create");
return 1;
}

if (pipe(p1) < 0) {
perror("pipe");
return 1;
}
if (pipe(p2) < 0) {
perror("pipe");
return 1;
}

ev.events = EPOLLIN;
ev.data.fd = p1[0];
if (epoll_ctl(efd, EPOLL_CTL_ADD, p1[0], &ev) < 0) {
perror("epoll add");
return 1;
}
ev.events = EPOLLIN;
ev.data.fd = p2[0];
if (epoll_ctl(efd, EPOLL_CTL_ADD, p2[0], &ev) < 0) {
perror("epoll add");
return 1;
}

/* always wait 200 msec for events */
ev.data.u64 = 200000;
if (epoll_ctl(efd, EPOLL_CTL_MIN_WAIT, -1, &ev) < 0) {
perror("epoll add set timeout");
return 1;
}

d.p1 = p1[1];
d.p2 = p2[1];
pthread_create(&thread, NULL, fn, &d);

/* expect to get 2 events here rather than just 1 */
ret = epoll_wait(efd, events, 2, -1);
printf("epoll_wait=%d\n", ret);

return 0;
}

If EPOLL_CTL_MIN_WAIT is used with a timeout of 0, it is a no-op, and
acts the same as if it wasn't called to begin with. Only a non-zero
usec delay value will result in a wait time being applied for reaping
events.

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
fs/eventpoll.c | 13 ++++++++++++-
include/linux/eventpoll.h | 2 +-
include/uapi/linux/eventpoll.h | 1 +
3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index daa9885d9c2b..ec7ffce8265a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2183,6 +2183,17 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
*/
ep = f.file->private_data;

+ /*
+ * Handle EPOLL_CTL_MIN_WAIT upfront as we don't need to care about
+ * the fd being passed in.
+ */
+ if (op == EPOLL_CTL_MIN_WAIT) {
+ /* return old value */
+ error = ep->min_wait_ts;
+ ep->min_wait_ts = epds->data;
+ goto error_fput;
+ }
+
/* Get the "struct file *" for the target file */
tf = fdget(fd);
if (!tf.file)
@@ -2315,7 +2326,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
{
struct epoll_event epds;

- if (ep_op_has_event(op) &&
+ if ((ep_op_has_event(op) || op == EPOLL_CTL_MIN_WAIT) &&
copy_from_user(&epds, event, sizeof(struct epoll_event)))
return -EFAULT;

diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 3337745d81bd..cbef635cb7e4 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -59,7 +59,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
static inline int ep_op_has_event(int op)
{
- return op != EPOLL_CTL_DEL;
+ return op != EPOLL_CTL_DEL && op != EPOLL_CTL_MIN_WAIT;
}

#else
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 8a3432d0f0dc..81ecb1ca36e0 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -26,6 +26,7 @@
#define EPOLL_CTL_ADD 1
#define EPOLL_CTL_DEL 2
#define EPOLL_CTL_MOD 3
+#define EPOLL_CTL_MIN_WAIT 4

/* Epoll event masks */
#define EPOLLIN (__force __poll_t)0x00000001
--
2.35.1