Currently, the only way to ignore outgoing packets on a packet socket is
via the BPF filter.  With MSG_ZEROCOPY, packets that are looped into
AF_PACKET are copied in dev_queue_xmit_nit(), and this copy happens even
if the filter run from packet_rcv() would reject them.  So the presence
of a packet socket on the interface takes away the benefits of
MSG_ZEROCOPY, even if the packet socket is not interested in outgoing
packets.  (Even when MSG_ZEROCOPY is not used, the skb is unnecessarily
cloned, but the cost for that is much lower.)

Add a socket option to allow AF_PACKET sockets to ignore outgoing
packets to solve this.  Note that the *BSDs already have something
similar: BIOCSSEESENT/BIOCSDIRECTION and BIOCSDIRFILT.

The first intended user is lldpd.

Signed-off-by: Vincent Whitchurch <vincent.whitchu...@axis.com>
---
 include/linux/netdevice.h      |  1 +
 include/uapi/linux/if_packet.h |  1 +
 net/core/dev.c                 |  3 +++
 net/packet/af_packet.c         | 15 +++++++++++++++
 4 files changed, 20 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca5ab98053c8..8ef14d9edc58 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2317,6 +2317,7 @@ static inline struct sk_buff 
*call_gro_receive_sk(gro_receive_sk_t cb,
 
 struct packet_type {
        __be16                  type;   /* This is really htons(ether_type). */
+       bool                    ignore_outgoing;
        struct net_device       *dev;   /* NULL is wildcarded here           */
        int                     (*func) (struct sk_buff *,
                                         struct net_device *,
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 67b61d91d89b..467b654bd4c7 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -57,6 +57,7 @@ struct sockaddr_ll {
 #define PACKET_QDISC_BYPASS            20
 #define PACKET_ROLLOVER_STATS          21
 #define PACKET_FANOUT_DATA             22
+#define PACKET_IGNORE_OUTGOING         23
 
 #define PACKET_FANOUT_HASH             0
 #define PACKET_FANOUT_LB               1
diff --git a/net/core/dev.c b/net/core/dev.c
index 325fc5088370..0addb4f0abfe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1947,6 +1947,9 @@ static inline bool skb_loop_sk(struct packet_type *ptype, 
struct sk_buff *skb)
        if (!ptype->af_packet_priv || !skb->sk)
                return false;
 
+       if (ptype->ignore_outgoing)
+               return true;
+
        if (ptype->id_match)
                return ptype->id_match(ptype, skb->sk);
        else if ((struct sock *)ptype->af_packet_priv == skb->sk)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5610061e7f2e..37bbafad724a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3805,6 +3805,18 @@ packet_setsockopt(struct socket *sock, int level, int 
optname, char __user *optv
 
                return fanout_set_data(po, optval, optlen);
        }
+       case PACKET_IGNORE_OUTGOING:
+       {
+               int val;
+
+               if (optlen != sizeof(val))
+                       return -EINVAL;
+               if (copy_from_user(&val, optval, sizeof(val)))
+                       return -EFAULT;
+
+               po->prot_hook.ignore_outgoing = !!val;
+               return 0;
+       }
        case PACKET_TX_HAS_OFF:
        {
                unsigned int val;
@@ -3928,6 +3940,9 @@ static int packet_getsockopt(struct socket *sock, int 
level, int optname,
                        ((u32)po->fanout->flags << 24)) :
                       0);
                break;
+       case PACKET_IGNORE_OUTGOING:
+               val = po->prot_hook.ignore_outgoing;
+               break;
        case PACKET_ROLLOVER_STATS:
                if (!po->rollover)
                        return -EINVAL;
-- 
2.11.0

Reply via email to