On 11/27/2018 11:25 PM, David Miller wrote:
> 
> In the linux/bpf.h UAPI header, we must absolutely avoid any
> non-fixed-sized types.
> 
> Otherwise we have serious problems on 32-bit.
> 
> Unfortunately I discovered today that we have take on two such cases,
> sk_msg_md and sk_reuseport_md, both of which start with two void
> pointers.
> 
> I hit this because test_verifier.c on my sparc64 box was built as
> a 32-bit binary and this causes a hundred or so tests to fail, and
> many if not all are because of the changing struct layout.
> 
> I could built 64-bit but this absolutely should work properly.
> 
> But for fully native 32-bit it is even worse, this will really
> need to be resolved because llvm/clang is always going to build
> the BPF programs with 64-bit void pointers.
> 
> I would strongly suggest we try and fix this now if we can.

Yeah fully agree. Thinking diff below should address it, do you
have a chance to give this a spin for sparc / 32 bit to check if
test_verifier still explodes?

Thanks a lot,
Daniel

 include/linux/filter.h         |  7 +++++++
 include/uapi/linux/bpf.h       | 17 ++++++++++++-----
 net/core/filter.c              | 16 ++++++++--------
 tools/include/uapi/linux/bpf.h | 17 ++++++++++++-----
 4 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 448dcc4..795ff0b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -449,6 +449,13 @@ struct sock_reuseport;
        offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
 #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)                             
\
        offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
+#if BITS_PER_LONG == 64
+# define bpf_ctx_range_ptr(TYPE, MEMBER)                                       
\
+       offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+#else
+# define bpf_ctx_range_ptr(TYPE, MEMBER)                                       
\
+       offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1
+#endif /* BITS_PER_LONG == 64 */

 #define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE)                           
\
        ({                                                                      
\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 852dc17..426b5c8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2422,6 +2422,12 @@ enum bpf_lwt_encap_mode {
        BPF_LWT_ENCAP_SEG6_INLINE
 };

+#define __bpf_md_ptr(type, name)       \
+union {                                        \
+       type name;                      \
+       __u64 :64;                      \
+} __attribute__((aligned(8)))
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -2456,7 +2462,7 @@ struct __sk_buff {
        /* ... here. */

        __u32 data_meta;
-       struct bpf_flow_keys *flow_keys;
+       __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 };

 struct bpf_tunnel_key {
@@ -2572,8 +2578,8 @@ enum sk_action {
  * be added to the end of this structure
  */
 struct sk_msg_md {
-       void *data;
-       void *data_end;
+       __bpf_md_ptr(void *, data);
+       __bpf_md_ptr(void *, data_end);

        __u32 family;
        __u32 remote_ip4;       /* Stored in network byte order */
@@ -2589,8 +2595,9 @@ struct sk_reuseport_md {
         * Start of directly accessible data. It begins from
         * the tcp/udp header.
         */
-       void *data;
-       void *data_end;         /* End of directly accessible data */
+       __bpf_md_ptr(void *, data);
+       /* End of directly accessible data */
+       __bpf_md_ptr(void *, data_end);
        /*
         * Total length of packet (starting from the tcp/udp header).
         * Note that the directly accessible bytes (data_end - data)
diff --git a/net/core/filter.c b/net/core/filter.c
index 9a1327e..6ee605d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5435,8 +5435,8 @@ static bool bpf_skb_is_valid_access(int off, int size, 
enum bpf_access_type type
                if (size != size_default)
                        return false;
                break;
-       case bpf_ctx_range(struct __sk_buff, flow_keys):
-               if (size != sizeof(struct bpf_flow_keys *))
+       case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+               if (size != sizeof(__u64))
                        return false;
                break;
        default:
@@ -5464,7 +5464,7 @@ static bool sk_filter_is_valid_access(int off, int size,
        case bpf_ctx_range(struct __sk_buff, data):
        case bpf_ctx_range(struct __sk_buff, data_meta):
        case bpf_ctx_range(struct __sk_buff, data_end):
-       case bpf_ctx_range(struct __sk_buff, flow_keys):
+       case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
                return false;
        }
@@ -5489,7 +5489,7 @@ static bool cg_skb_is_valid_access(int off, int size,
        switch (off) {
        case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range(struct __sk_buff, data_meta):
-       case bpf_ctx_range(struct __sk_buff, flow_keys):
+       case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
                return false;
        case bpf_ctx_range(struct __sk_buff, data):
        case bpf_ctx_range(struct __sk_buff, data_end):
@@ -5530,7 +5530,7 @@ static bool lwt_is_valid_access(int off, int size,
        case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
        case bpf_ctx_range(struct __sk_buff, data_meta):
-       case bpf_ctx_range(struct __sk_buff, flow_keys):
+       case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
                return false;
        }

@@ -5756,7 +5756,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
        case bpf_ctx_range(struct __sk_buff, data_end):
                info->reg_type = PTR_TO_PACKET_END;
                break;
-       case bpf_ctx_range(struct __sk_buff, flow_keys):
+       case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
                return false;
        }
@@ -5958,7 +5958,7 @@ static bool sk_skb_is_valid_access(int off, int size,
        switch (off) {
        case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range(struct __sk_buff, data_meta):
-       case bpf_ctx_range(struct __sk_buff, flow_keys):
+       case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
                return false;
        }

@@ -6039,7 +6039,7 @@ static bool flow_dissector_is_valid_access(int off, int 
size,
        case bpf_ctx_range(struct __sk_buff, data_end):
                info->reg_type = PTR_TO_PACKET_END;
                break;
-       case bpf_ctx_range(struct __sk_buff, flow_keys):
+       case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
                info->reg_type = PTR_TO_FLOW_KEYS;
                break;
        case bpf_ctx_range(struct __sk_buff, tc_classid):
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 852dc17..426b5c8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2422,6 +2422,12 @@ enum bpf_lwt_encap_mode {
        BPF_LWT_ENCAP_SEG6_INLINE
 };

+#define __bpf_md_ptr(type, name)       \
+union {                                        \
+       type name;                      \
+       __u64 :64;                      \
+} __attribute__((aligned(8)))
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -2456,7 +2462,7 @@ struct __sk_buff {
        /* ... here. */

        __u32 data_meta;
-       struct bpf_flow_keys *flow_keys;
+       __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 };

 struct bpf_tunnel_key {
@@ -2572,8 +2578,8 @@ enum sk_action {
  * be added to the end of this structure
  */
 struct sk_msg_md {
-       void *data;
-       void *data_end;
+       __bpf_md_ptr(void *, data);
+       __bpf_md_ptr(void *, data_end);

        __u32 family;
        __u32 remote_ip4;       /* Stored in network byte order */
@@ -2589,8 +2595,9 @@ struct sk_reuseport_md {
         * Start of directly accessible data. It begins from
         * the tcp/udp header.
         */
-       void *data;
-       void *data_end;         /* End of directly accessible data */
+       __bpf_md_ptr(void *, data);
+       /* End of directly accessible data */
+       __bpf_md_ptr(void *, data_end);
        /*
         * Total length of packet (starting from the tcp/udp header).
         * Note that the directly accessible bytes (data_end - data)
-- 
2.9.5

Reply via email to