Allow users to set the IPv4 DF bit in outgoing packets, or to inherit its
value from the IPv4 inner header. If the encapsulated protocol is IPv6 and
DF is configured to be inherited, always set it.

For IPv4, inheriting DF from the inner header was probably intended from
the very beginning judging by the comment to vxlan_xmit(), but it wasn't
actually implemented -- also because it would have done more harm than
good, without handling for ICMP Fragmentation Needed messages.

According to RFC 7348, "Path MTU discovery MAY be used". An expired RFC
draft, draft-saum-nvo3-pmtud-over-vxlan-05, whose purpose was to describe
PMTUD implementation, says that "is a MUST that Vxlan gateways [...]
SHOULD set the DF-bit [...]", whatever that means.

Given this background, the only sane option is probably to let the user
decide, and keep the current behaviour as default.

Reviewed-by: Sabrina Dubroca <s...@queasysnail.net>
Signed-off-by: Stefano Brivio <sbri...@redhat.com>
---
 drivers/net/vxlan.c          | 29 +++++++++++++++++++++++++++++
 include/net/vxlan.h          |  1 +
 include/uapi/linux/if_link.h |  9 +++++++++
 3 files changed, 39 insertions(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 7706e392b2a7..ccb19b833706 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2289,6 +2289,19 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct 
net_device *dev,
                        df = htons(IP_DF);
                }
 
+               if (!df) {
+                       if (vxlan->cfg.df == VXLAN_DF_SET) {
+                               df = htons(IP_DF);
+                       } else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
+                               struct ethhdr *eth = eth_hdr(skb);
+
+                               if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
+                                   (ntohs(eth->h_proto) == ETH_P_IP &&
+                                    old_iph->frag_off & htons(IP_DF)))
+                                       df = htons(IP_DF);
+                       }
+               }
+
                ndst = &rt->dst;
                skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
 
@@ -2837,6 +2850,7 @@ static const struct nla_policy 
vxlan_policy[IFLA_VXLAN_MAX + 1] = {
        [IFLA_VXLAN_GPE]        = { .type = NLA_FLAG, },
        [IFLA_VXLAN_REMCSUM_NOPARTIAL]  = { .type = NLA_FLAG },
        [IFLA_VXLAN_TTL_INHERIT]        = { .type = NLA_FLAG },
+       [IFLA_VXLAN_DF]         = { .type = NLA_U8 },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -2893,6 +2907,16 @@ static int vxlan_validate(struct nlattr *tb[], struct 
nlattr *data[],
                }
        }
 
+       if (data[IFLA_VXLAN_DF]) {
+               enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
+
+               if (df < 0 || df > VXLAN_DF_MAX) {
+                       NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_DF],
+                                           "Invalid DF attribute");
+                       return -EINVAL;
+               }
+       }
+
        return 0;
 }
 
@@ -3538,6 +3562,9 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct 
nlattr *data[],
                conf->mtu = nla_get_u32(tb[IFLA_MTU]);
        }
 
+       if (data[IFLA_VXLAN_DF])
+               conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
+
        return 0;
 }
 
@@ -3630,6 +3657,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
                nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL */
                nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL_INHERIT */
                nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TOS */
+               nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_DF */
                nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
                nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_LEARNING */
                nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_PROXY */
@@ -3696,6 +3724,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const 
struct net_device *dev)
            nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
                       !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
            nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
+           nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
            nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
            nla_put_u8(skb, IFLA_VXLAN_LEARNING,
                        !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 03431c148e16..ec999c49df1f 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -216,6 +216,7 @@ struct vxlan_config {
        unsigned long           age_interval;
        unsigned int            addrmax;
        bool                    no_share;
+       enum ifla_vxlan_df      df;
 };
 
 struct vxlan_dev_node {
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 1debfa42cba1..efc588949431 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -533,6 +533,7 @@ enum {
        IFLA_VXLAN_LABEL,
        IFLA_VXLAN_GPE,
        IFLA_VXLAN_TTL_INHERIT,
+       IFLA_VXLAN_DF,
        __IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -542,6 +543,14 @@ struct ifla_vxlan_port_range {
        __be16  high;
 };
 
+enum ifla_vxlan_df {
+       VXLAN_DF_UNSET = 0,
+       VXLAN_DF_SET,
+       VXLAN_DF_INHERIT,
+       __VXLAN_DF_END,
+       VXLAN_DF_MAX = __VXLAN_DF_END - 1,
+};
+
 /* GENEVE section */
 enum {
        IFLA_GENEVE_UNSPEC,
-- 
2.19.1

Reply via email to