Re: [PATCH net-next v7 4/4] eventpoll: Add epoll ioctl for epoll_params

From: Eric Dumazet
Date: Sun Feb 11 2024 - 04:41:09 EST


On Fri, Feb 9, 2024 at 10:15 PM Joe Damato <jdamato@xxxxxxxxxx> wrote:
>
> Add an ioctl for getting and setting epoll_params. User programs can use
> this ioctl to get and set the busy poll usec time, packet budget, and
> prefer busy poll params for a specific epoll context.
>
> Parameters are limited:
> - busy_poll_usecs is limited to <= s32_max
> - busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged
> users (!capable(CAP_NET_ADMIN))
> - prefer_busy_poll must be 0 or 1
> - __pad must be 0
>
> Signed-off-by: Joe Damato <jdamato@xxxxxxxxxx>
> Acked-by: Stanislav Fomichev <sdf@xxxxxxxxxx>
> Reviewed-by: Jiri Slaby <jirislaby@xxxxxxxxxx>
> ---
> .../userspace-api/ioctl/ioctl-number.rst | 1 +
> fs/eventpoll.c | 72 +++++++++++++++++++
> include/uapi/linux/eventpoll.h | 13 ++++
> 3 files changed, 86 insertions(+)
>
> diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
> index 457e16f06e04..b33918232f78 100644
> --- a/Documentation/userspace-api/ioctl/ioctl-number.rst
> +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
> @@ -309,6 +309,7 @@ Code Seq# Include File Comments
> 0x89 0B-DF linux/sockios.h
> 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
> 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
> +0x8A 00-1F linux/eventpoll.h
> 0x8B all linux/wireless.h
> 0x8C 00-3F WiNRADiO driver
> <http://www.winradio.com.au/>
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index 1b8d01af0c2c..aa58d42737e6 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -37,6 +37,7 @@
> #include <linux/seq_file.h>
> #include <linux/compat.h>
> #include <linux/rculist.h>
> +#include <linux/capability.h>
> #include <net/busy_poll.h>
>
> /*
> @@ -494,6 +495,49 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
> ep->napi_id = napi_id;
> }
>
> +static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> + unsigned long arg)
> +{
> + struct eventpoll *ep = file->private_data;
> + void __user *uarg = (void __user *)arg;
> + struct epoll_params epoll_params;
> +
> + switch (cmd) {
> + case EPIOCSPARAMS:
> + if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
> + return -EFAULT;
> +
> + /* pad byte must be zero */
> + if (epoll_params.__pad)
> + return -EINVAL;
> +
> + if (epoll_params.busy_poll_usecs > S32_MAX)
> + return -EINVAL;
> +
> + if (epoll_params.prefer_busy_poll > 1)
> + return -EINVAL;
> +
> + if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
> + !capable(CAP_NET_ADMIN))
> + return -EPERM;
> +
> + ep->busy_poll_usecs = epoll_params.busy_poll_usecs;

You need WRITE_ONCE(ep->XXX, val); for all these settings.

> + ep->busy_poll_budget = epoll_params.busy_poll_budget;
> + ep->prefer_busy_poll = epoll_params.prefer_busy_poll;
> + return 0;
> + case EPIOCGPARAMS:
> + memset(&epoll_params, 0, sizeof(epoll_params));
> + epoll_params.busy_poll_usecs = ep->busy_poll_usecs;

You need to use READ_ONCE(ep->XXXXX) for the three reads.


> + epoll_params.busy_poll_budget = ep->busy_poll_budget;
> + epoll_params.prefer_busy_poll = ep->prefer_busy_poll;
> + if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
> + return -EFAULT;
> + return 0;
> + default:
> + return -ENOIOCTLCMD;
> + }
> +}
>