On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasow...@redhat.com> wrote: > This patch allows userspace to attach eBPF filter to tun. This will > allow to implement VM dataplane filtering in a more efficient way > compared to cBPF filter by allowing either qemu or libvirt to > attach eBPF filter to tun. > > Signed-off-by: Jason Wang <jasow...@redhat.com> > --- > drivers/net/tun.c | 39 +++++++++++++++++++++++++++++++++++---- > include/uapi/linux/if_tun.h | 1 + > 2 files changed, 36 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index 0853829..9fc8b70 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -238,6 +238,12 @@ struct tun_struct { > struct tun_pcpu_stats __percpu *pcpu_stats; > struct bpf_prog __rcu *xdp_prog; > struct tun_prog __rcu *steering_prog; > + struct tun_prog __rcu *filter_prog; > +}; > + > +struct veth { > + __be16 h_vlan_proto; > + __be16 h_vlan_TCI; > }; > > static int tun_napi_receive(struct napi_struct *napi, int budget) > @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, > struct sk_buff *skb) > #endif > } > > +static unsigned int run_ebpf_filter(struct tun_struct *tun, > + struct sk_buff *skb, > + int len) > +{ > + struct tun_prog *prog = rcu_dereference(tun->filter_prog); > + > + if (prog) > + len = bpf_prog_run_clear_cb(prog->prog, skb); > + > + return len; > +} > + > /* Net device start xmit */ > static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) > { > struct tun_struct *tun = netdev_priv(dev); > int txq = skb->queue_mapping; > struct tun_file *tfile; > + int len = skb->len; > > rcu_read_lock(); > tfile = rcu_dereference(tun->tfiles[txq]); > @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, > struct net_device *dev) > sk_filter(tfile->socket.sk, skb)) > goto drop; > > + len = run_ebpf_filter(tun, skb, len); > + > + /* Trim extra bytes since we may inster vlan proto & TCI
inster -> insert > + * in tun_put_user(). > + */ > + if (skb_vlan_tag_present(skb)) > + len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0; no need for testing skb_vlan_tag_present twice. more importantly, why trim these bytes unconditionally? only if the filter trims a packet to a length shorter than the the minimum could this cause problems. sk_filter_trim_cap with a lower bound avoids that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0;