From: Shrijeet Mukherjee <shrij...@gmail.com>

Push: takes an array/size of labels and encaps
Pop : Takes a number and pops the top "n" labels

Signed-off-by: Shrijeet Mukherjee <s...@cumulusnetworks.com>

PS: Will be adding tests as well, refactoring my test into something
smaller.

---
 include/linux/bpf.h      |   2 +
 include/net/mpls.h       |  38 +++++++++++++-
 include/uapi/linux/bpf.h |  11 +++-
 net/core/filter.c        | 127 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 175 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 819229c80eca..b38bfabc1fb5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -672,6 +672,8 @@ extern const struct bpf_func_proto 
bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
+extern const struct bpf_func_proto bpf_skb_mpls_push_proto;
+extern const struct bpf_func_proto bpf_skb_mpls_pop_proto;
 extern const struct bpf_func_proto bpf_get_stackid_proto;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;
 
diff --git a/include/net/mpls.h b/include/net/mpls.h
index 2583dbc689b8..3a5b8c00823d 100644
--- a/include/net/mpls.h
+++ b/include/net/mpls.h
@@ -24,14 +24,50 @@ struct mpls_shim_hdr {
        __be32 label_stack_entry;
 };
 
+struct mpls_entry_decoded {
+       u32 label;
+       u8 ttl;
+       u8 tc;
+       u8 bos;
+};
+
 static inline bool eth_p_mpls(__be16 eth_type)
 {
        return eth_type == htons(ETH_P_MPLS_UC) ||
                eth_type == htons(ETH_P_MPLS_MC);
 }
 
-static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
+static inline struct mpls_shim_hdr *skb_mpls_hdr(const struct sk_buff *skb)
 {
        return (struct mpls_shim_hdr *)skb_network_header(skb);
 }
+
+static inline struct mpls_shim_hdr
+mpls_entry_encode(u32 label, unsigned int ttl, unsigned int tc, bool bos)
+{
+       struct mpls_shim_hdr result;
+
+       result.label_stack_entry =
+               cpu_to_be32((label << MPLS_LS_LABEL_SHIFT)
+            | (tc << MPLS_LS_TC_SHIFT)
+            | (bos ? (1 << MPLS_LS_S_SHIFT) : 0)
+            | (ttl << MPLS_LS_TTL_SHIFT));
+
+       return result;
+}
+
+static inline
+struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
+{
+       struct mpls_entry_decoded result;
+       unsigned int entry = be32_to_cpu(hdr->label_stack_entry);
+
+       result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+       result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+       result.tc =  (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+       result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
+
+       return result;
+}
+
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18b7c510c511..2278548e1f8f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -439,6 +439,12 @@ union bpf_attr {
  * int bpf_skb_vlan_pop(skb)
  *     Return: 0 on success or negative error
  *
+ * int bpf_skb_mpls_push(skb, num_lbls, lbls[])
+ *     Return 0 on success or negative error
+ *
+ * int bpf_skb_mpls_pop(skb, num_lbls)
+ *     Return number of popped labels, 0 is no-op, deliver packet to current 
dst
+ *
  * int bpf_skb_get_tunnel_key(skb, key, size, flags)
  * int bpf_skb_set_tunnel_key(skb, key, size, flags)
  *     retrieve or populate tunnel metadata
@@ -794,7 +800,10 @@ union bpf_attr {
        FN(msg_redirect_map),           \
        FN(msg_apply_bytes),            \
        FN(msg_cork_bytes),             \
-       FN(msg_pull_data),
+       FN(msg_pull_data),              \
+       FN(skb_mpls_push),              \
+       FN(skb_mpls_pop),
+
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 00f62fafc788..c96ae8ef423d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2522,7 +2522,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 
len_diff)
 }
 
 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
-          u32, mode, u64, flags)
+                               u32, mode, u64, flags)
 {
        if (unlikely(flags))
                return -EINVAL;
@@ -2542,6 +2542,125 @@ static const struct bpf_func_proto 
bpf_skb_adjust_room_proto = {
        .arg4_type      = ARG_ANYTHING,
 };
 
+static int bpf_skb_mpls_net_grow(struct sk_buff *skb, int len_diff)
+{
+       u32 off = skb_mac_header_len(skb); /*LL_RESERVED_SPACE ?? */
+       int ret;
+
+       ret = skb_cow(skb, len_diff);
+       if (unlikely(ret < 0))
+               return ret;
+
+       skb_set_inner_protocol(skb, skb->protocol);
+       skb_reset_inner_network_header(skb);
+
+       ret = bpf_skb_generic_push(skb, off, len_diff);
+       if (unlikely(ret < 0))
+               return ret;
+
+       skb_reset_mac_header(skb);
+       skb_set_network_header(skb, ETH_HLEN);
+       skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
+
+       if (skb_is_gso(skb)) {
+/* Due to header grow, MSS needs to be downgraded. */
+               skb_shinfo(skb)->gso_size -= len_diff;
+/* Header must be checked, and gso_segs recomputed. */
+               skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+               skb_shinfo(skb)->gso_segs = 0;
+       }
+
+       bpf_compute_data_pointers(skb);
+       return 0;
+}
+
+BPF_CALL_2(bpf_skb_mpls_pop, struct sk_buff*, skb, u32, num_lbls)
+{
+       u32 i = 0;
+       struct mpls_shim_hdr *hdr;
+       unsigned char *cursor;
+       struct mpls_entry_decoded dec;
+
+       if (num_lbls == 0)
+               return 0;
+
+       cursor = skb_network_header(skb);
+       do {
+               hdr = (struct mpls_shim_hdr *)cursor;
+               dec = mpls_entry_decode(hdr);
+               i++; cursor = cursor + sizeof(struct mpls_shim_hdr);
+       } while (dec.bos != 1 && i < num_lbls);
+
+       bpf_push_mac_rcsum(skb);
+       skb_pull(skb, i * sizeof(struct mpls_shim_hdr));
+       skb_reset_network_header(skb);
+
+       skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
+       bpf_pull_mac_rcsum(skb);
+       bpf_compute_data_pointers(skb);
+
+       return i;
+}
+
+const struct bpf_func_proto bpf_skb_mpls_pop_proto = {
+                               .func   = bpf_skb_mpls_pop,
+                               .gpl_only = false,
+                               .ret_type = RET_INTEGER,
+                               .arg1_type  = ARG_PTR_TO_CTX,
+                               .arg2_type  = ARG_ANYTHING,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_mpls_pop_proto);
+
+BPF_CALL_3(bpf_skb_mpls_push, struct sk_buff*, skb,
+          __be32*, lbls, u32, num_lbls)
+{
+       int ret, i;
+       unsigned int new_header_size = num_lbls * sizeof(__be32);
+       unsigned int ttl = 255;
+       struct dst_entry *dst = skb_dst(skb);
+       struct net_device *out_dev = dst->dev;
+       struct mpls_shim_hdr *hdr;
+       bool bos;
+
+       /* Ensure there is enough space for the headers in the skb */
+       ret = bpf_skb_mpls_net_grow(skb, new_header_size);
+       if (ret < 0) {
+               trace_printk("COW was killed\n");
+               bpf_compute_data_pointers(skb);
+               return -ENOMEM;
+       }
+
+       skb->dev = out_dev;
+/* XXX this may need finesse to integrate with
+ * global TTL values for MPLS
+ */
+       if (dst->ops->family == AF_INET)
+               ttl = ip_hdr(skb)->ttl;
+       else if (dst->ops->family == AF_INET6)
+               ttl = ipv6_hdr(skb)->hop_limit;
+
+       /* Push the new labels */
+       hdr = skb_mpls_hdr(skb);
+       bos = true;
+       for (i = num_lbls - 1; i >= 0; i--) {
+               hdr[i] = mpls_entry_encode(lbls[i], ttl, 0, bos);
+               bos = false;
+       }
+
+       bpf_compute_data_pointers(skb);
+       return 0;
+}
+
+const struct bpf_func_proto bpf_skb_mpls_push_proto = {
+       .func           = bpf_skb_mpls_push,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_mpls_push_proto);
+
 static u32 __bpf_skb_min_len(const struct sk_buff *skb)
 {
        u32 min_len = skb_network_offset(skb);
@@ -3019,6 +3138,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 {
        if (func == bpf_skb_vlan_push ||
            func == bpf_skb_vlan_pop ||
+           func == bpf_skb_mpls_push ||
+           func == bpf_skb_mpls_pop ||
            func == bpf_skb_store_bytes ||
            func == bpf_skb_change_proto ||
            func == bpf_skb_change_head ||
@@ -3682,6 +3803,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
                return &bpf_skb_vlan_push_proto;
        case BPF_FUNC_skb_vlan_pop:
                return &bpf_skb_vlan_pop_proto;
+       case BPF_FUNC_skb_mpls_push:
+               return &bpf_skb_mpls_push_proto;
+       case BPF_FUNC_skb_mpls_pop:
+               return &bpf_skb_mpls_pop_proto;
        case BPF_FUNC_skb_change_proto:
                return &bpf_skb_change_proto_proto;
        case BPF_FUNC_skb_change_type:
-- 
2.16.2.windows.1

Reply via email to