From: Shrijeet Mukherjee <shrij...@gmail.com> Push: takes an array/size of labels and encaps Pop : Takes a number and pops the top "n" labels
Signed-off-by: Shrijeet Mukherjee <s...@cumulusnetworks.com> PS: Will be adding tests as well, refactoring my test into something smaller. --- include/linux/bpf.h | 2 + include/net/mpls.h | 38 +++++++++++++- include/uapi/linux/bpf.h | 11 +++- net/core/filter.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 175 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 819229c80eca..b38bfabc1fb5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -672,6 +672,8 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; +extern const struct bpf_func_proto bpf_skb_mpls_push_proto; +extern const struct bpf_func_proto bpf_skb_mpls_pop_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto; diff --git a/include/net/mpls.h b/include/net/mpls.h index 2583dbc689b8..3a5b8c00823d 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -24,14 +24,50 @@ struct mpls_shim_hdr { __be32 label_stack_entry; }; +struct mpls_entry_decoded { + u32 label; + u8 ttl; + u8 tc; + u8 bos; +}; + static inline bool eth_p_mpls(__be16 eth_type) { return eth_type == htons(ETH_P_MPLS_UC) || eth_type == htons(ETH_P_MPLS_MC); } -static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) +static inline struct mpls_shim_hdr *skb_mpls_hdr(const struct sk_buff *skb) { return (struct mpls_shim_hdr *)skb_network_header(skb); } + +static inline struct mpls_shim_hdr +mpls_entry_encode(u32 label, unsigned int ttl, unsigned int tc, bool bos) +{ + struct mpls_shim_hdr result; + + result.label_stack_entry = + cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) + | (tc << MPLS_LS_TC_SHIFT) + | (bos ? (1 << MPLS_LS_S_SHIFT) : 0) + | (ttl << MPLS_LS_TTL_SHIFT)); + + return result; +} + +static inline +struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr) +{ + struct mpls_entry_decoded result; + unsigned int entry = be32_to_cpu(hdr->label_stack_entry); + + result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; + result.tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT; + + return result; +} + #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 18b7c510c511..2278548e1f8f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -439,6 +439,12 @@ union bpf_attr { * int bpf_skb_vlan_pop(skb) * Return: 0 on success or negative error * + * int bpf_skb_mpls_push(skb, num_lbls, lbls[]) + * Return 0 on success or negative error + * + * int bpf_skb_mpls_pop(skb, num_lbls) + * Return number of popped labels, 0 is no-op, deliver packet to current dst + * * int bpf_skb_get_tunnel_key(skb, key, size, flags) * int bpf_skb_set_tunnel_key(skb, key, size, flags) * retrieve or populate tunnel metadata @@ -794,7 +800,10 @@ union bpf_attr { FN(msg_redirect_map), \ FN(msg_apply_bytes), \ FN(msg_cork_bytes), \ - FN(msg_pull_data), + FN(msg_pull_data), \ + FN(skb_mpls_push), \ + FN(skb_mpls_pop), + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index 00f62fafc788..c96ae8ef423d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2522,7 +2522,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) } BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, - u32, mode, u64, flags) + u32, mode, u64, flags) { if (unlikely(flags)) return -EINVAL; @@ -2542,6 +2542,125 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = { .arg4_type = ARG_ANYTHING, }; +static int bpf_skb_mpls_net_grow(struct sk_buff *skb, int len_diff) +{ + u32 off = skb_mac_header_len(skb); /*LL_RESERVED_SPACE ?? */ + int ret; + + ret = skb_cow(skb, len_diff); + if (unlikely(ret < 0)) + return ret; + + skb_set_inner_protocol(skb, skb->protocol); + skb_reset_inner_network_header(skb); + + ret = bpf_skb_generic_push(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + skb_reset_mac_header(skb); + skb_set_network_header(skb, ETH_HLEN); + skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC); + + if (skb_is_gso(skb)) { +/* Due to header grow, MSS needs to be downgraded. */ + skb_shinfo(skb)->gso_size -= len_diff; +/* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + bpf_compute_data_pointers(skb); + return 0; +} + +BPF_CALL_2(bpf_skb_mpls_pop, struct sk_buff*, skb, u32, num_lbls) +{ + u32 i = 0; + struct mpls_shim_hdr *hdr; + unsigned char *cursor; + struct mpls_entry_decoded dec; + + if (num_lbls == 0) + return 0; + + cursor = skb_network_header(skb); + do { + hdr = (struct mpls_shim_hdr *)cursor; + dec = mpls_entry_decode(hdr); + i++; cursor = cursor + sizeof(struct mpls_shim_hdr); + } while (dec.bos != 1 && i < num_lbls); + + bpf_push_mac_rcsum(skb); + skb_pull(skb, i * sizeof(struct mpls_shim_hdr)); + skb_reset_network_header(skb); + + skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC); + bpf_pull_mac_rcsum(skb); + bpf_compute_data_pointers(skb); + + return i; +} + +const struct bpf_func_proto bpf_skb_mpls_pop_proto = { + .func = bpf_skb_mpls_pop, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +EXPORT_SYMBOL_GPL(bpf_skb_mpls_pop_proto); + +BPF_CALL_3(bpf_skb_mpls_push, struct sk_buff*, skb, + __be32*, lbls, u32, num_lbls) +{ + int ret, i; + unsigned int new_header_size = num_lbls * sizeof(__be32); + unsigned int ttl = 255; + struct dst_entry *dst = skb_dst(skb); + struct net_device *out_dev = dst->dev; + struct mpls_shim_hdr *hdr; + bool bos; + + /* Ensure there is enough space for the headers in the skb */ + ret = bpf_skb_mpls_net_grow(skb, new_header_size); + if (ret < 0) { + trace_printk("COW was killed\n"); + bpf_compute_data_pointers(skb); + return -ENOMEM; + } + + skb->dev = out_dev; +/* XXX this may need finesse to integrate with + * global TTL values for MPLS + */ + if (dst->ops->family == AF_INET) + ttl = ip_hdr(skb)->ttl; + else if (dst->ops->family == AF_INET6) + ttl = ipv6_hdr(skb)->hop_limit; + + /* Push the new labels */ + hdr = skb_mpls_hdr(skb); + bos = true; + for (i = num_lbls - 1; i >= 0; i--) { + hdr[i] = mpls_entry_encode(lbls[i], ttl, 0, bos); + bos = false; + } + + bpf_compute_data_pointers(skb); + return 0; +} + +const struct bpf_func_proto bpf_skb_mpls_push_proto = { + .func = bpf_skb_mpls_push, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; +EXPORT_SYMBOL_GPL(bpf_skb_mpls_push_proto); + static u32 __bpf_skb_min_len(const struct sk_buff *skb) { u32 min_len = skb_network_offset(skb); @@ -3019,6 +3138,8 @@ bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || func == bpf_skb_vlan_pop || + func == bpf_skb_mpls_push || + func == bpf_skb_mpls_pop || func == bpf_skb_store_bytes || func == bpf_skb_change_proto || func == bpf_skb_change_head || @@ -3682,6 +3803,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_vlan_push_proto; case BPF_FUNC_skb_vlan_pop: return &bpf_skb_vlan_pop_proto; + case BPF_FUNC_skb_mpls_push: + return &bpf_skb_mpls_push_proto; + case BPF_FUNC_skb_mpls_pop: + return &bpf_skb_mpls_pop_proto; case BPF_FUNC_skb_change_proto: return &bpf_skb_change_proto_proto; case BPF_FUNC_skb_change_type: -- 2.16.2.windows.1