Re: [PATCH net-next V2 3/3] tun: add eBPF based queue selection method

From: Michael S. Tsirkin
Date: Tue Oct 31 2017 - 12:45:52 EST


On Tue, Oct 31, 2017 at 06:32:18PM +0800, Jason Wang wrote:
> This patch introduces an eBPF based queue selection method based on
> the flow steering policy ops. Userspace could load an eBPF program
> through TUNSETSTEERINGEBPF. This gives much more flexibility compare
> to simple but hard coded policy in kernel.
>
> Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
> ---
> drivers/net/tun.c | 79 ++++++++++++++++++++++++++++++++++++++++++++-
> include/uapi/linux/if_tun.h | 2 ++
> 2 files changed, 80 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index ab109ff..4bdde21 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -191,6 +191,20 @@ struct tun_steering_ops {
> u32 data);
> };
>
> +void tun_steering_xmit_nop(struct tun_struct *tun, struct sk_buff *skb)
> +{
> +}
> +
> +u32 tun_steering_pre_rx_nop(struct tun_struct *tun, struct sk_buff *skb)
> +{
> + return 0;
> +}
> +
> +void tun_steering_post_rx_nop(struct tun_struct *tun, struct tun_file *tfile,
> + u32 data)
> +{
> +}
> +
> struct tun_flow_entry {
> struct hlist_node hash_link;
> struct rcu_head rcu;
> @@ -241,6 +255,7 @@ struct tun_struct {
> u32 rx_batched;
> struct tun_pcpu_stats __percpu *pcpu_stats;
> struct bpf_prog __rcu *xdp_prog;
> + struct bpf_prog __rcu *steering_prog;
> struct tun_steering_ops *steering_ops;
> };
>
> @@ -576,6 +591,19 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
> return txq;
> }
>
> +static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
> +{
> + struct bpf_prog *prog;
> + u16 ret = 0;
> +
> + rcu_read_lock();
> + prog = rcu_dereference(tun->steering_prog);
> + if (prog)
> + ret = bpf_prog_run_clear_cb(prog, skb);
> + rcu_read_unlock();
> +
> + return ret % tun->numqueues;
> +}
> static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
> void *accel_priv, select_queue_fallback_t fallback)
> {
> @@ -2017,6 +2045,20 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
> return ret;
> }
>
> +static void __tun_set_steering_ebpf(struct tun_struct *tun,
> + struct bpf_prog *new)
> +{
> + struct bpf_prog *old;
> +
> + old = rtnl_dereference(tun->steering_prog);
> + rcu_assign_pointer(tun->steering_prog, new);
> +
> + if (old) {
> + synchronize_net();
> + bpf_prog_destroy(old);
> + }
> +}
> +

Is this really called under rtnl? If no then rtnl_dereference
is wrong. If yes I'm not sure you can call synchronize_net
under rtnl.

> static void tun_free_netdev(struct net_device *dev)
> {
> struct tun_struct *tun = netdev_priv(dev);
> @@ -2025,6 +2067,7 @@ static void tun_free_netdev(struct net_device *dev)
> free_percpu(tun->pcpu_stats);
> tun_flow_uninit(tun);
> security_tun_dev_free_security(tun->security);
> + __tun_set_steering_ebpf(tun, NULL);
> }
>
> static void tun_setup(struct net_device *dev)
> @@ -2159,6 +2202,13 @@ static struct tun_steering_ops tun_automq_ops = {
> .post_rx = tun_automq_post_rx,
> };
>
> +static struct tun_steering_ops tun_ebpf_ops = {
> + .select_queue = tun_ebpf_select_queue,
> + .xmit = tun_steering_xmit_nop,
> + .pre_rx = tun_steering_pre_rx_nop,
> + .post_rx = tun_steering_post_rx_nop,
> +};
> +
> static int tun_flags(struct tun_struct *tun)
> {
> return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP);
> @@ -2311,6 +2361,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> tun->filter_attached = false;
> tun->sndbuf = tfile->socket.sk->sk_sndbuf;
> tun->rx_batched = 0;
> + RCU_INIT_POINTER(tun->steering_prog, NULL);
>
> tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
> if (!tun->pcpu_stats) {
> @@ -2503,6 +2554,23 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
> return ret;
> }
>
> +static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
> +{
> + struct bpf_prog *prog;
> + u32 fd;
> +
> + if (copy_from_user(&fd, data, sizeof(fd)))
> + return -EFAULT;
> +
> + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
> + if (IS_ERR(prog))
> + return PTR_ERR(prog);
> +
> + __tun_set_steering_ebpf(tun, prog);
> +
> + return 0;
> +}
> +
> static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> unsigned long arg, int ifreq_len)
> {
> @@ -2785,6 +2853,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> case TUN_STEERING_AUTOMQ:
> tun->steering_ops = &tun_automq_ops;
> break;
> + case TUN_STEERING_EBPF:
> + tun->steering_ops = &tun_ebpf_ops;
> + break;
> default:
> ret = -EFAULT;
> }
> @@ -2794,6 +2865,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> ret = 0;
> if (tun->steering_ops == &tun_automq_ops)
> steering = TUN_STEERING_AUTOMQ;
> + else if (tun->steering_ops == &tun_ebpf_ops)
> + steering = TUN_STEERING_EBPF;
> else
> BUG();
> if (copy_to_user(argp, &steering, sizeof(steering)))
> @@ -2802,11 +2875,15 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>
> case TUNGETSTEERINGFEATURES:
> ret = 0;
> - steering = TUN_STEERING_AUTOMQ;
> + steering = TUN_STEERING_AUTOMQ | TUN_STEERING_EBPF;
> if (copy_to_user(argp, &steering, sizeof(steering)))
> ret = -EFAULT;
> break;
>
> + case TUNSETSTEERINGEBPF:
> + ret = tun_set_steering_ebpf(tun, argp);
> + break;
> +
> default:
> ret = -EINVAL;
> break;
> diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
> index 109760e..927f7e4 100644
> --- a/include/uapi/linux/if_tun.h
> +++ b/include/uapi/linux/if_tun.h
> @@ -59,6 +59,7 @@
> #define TUNSETSTEERING _IOW('T', 224, unsigned int)
> #define TUNGETSTEERING _IOR('T', 225, unsigned int)
> #define TUNGETSTEERINGFEATURES _IOR('T', 226, unsigned int)
> +#define TUNSETSTEERINGEBPF _IOR('T', 227, int)
>
> /* TUNSETIFF ifr flags */
> #define IFF_TUN 0x0001
> @@ -112,5 +113,6 @@ struct tun_filter {
> };
>
> #define TUN_STEERING_AUTOMQ 0x01 /* Automatic flow steering */
> +#define TUN_STEERING_EBPF 0x02 /* eBPF based flow steering */
>
> #endif /* _UAPI__IF_TUN_H */
> --
> 2.7.4