[PATCH 2/7] v2: Turn "zext_thyself" from global into a context var

Shahab Vahedi Tue, 30 Apr 2024 08:10:22 -0700

From: Shahab Vahedi <sha...@synopsys.com>

also:


- Update some comments along the way.
- Refactor the gen_swap()'s "if/else" to present the logic better
- Remove "extern" from the proto-type
---
 arch/arc/net/bpf_jit.h       | 14 +++++-----
 arch/arc/net/bpf_jit_arcv2.c | 51 ++++++++++++++++++------------------
 arch/arc/net/bpf_jit_core.c  | 25 +++++++++---------
 3 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h
index ecad47b8b796..9fc70d97415b 100644
--- a/arch/arc/net/bpf_jit.h
+++ b/arch/arc/net/bpf_jit.h
@@ -37,10 +37,6 @@
  */
 #define BUF(b, n) (((b) != NULL) ? ((b) + (n)) : (b))
 
-/************* Globals that have effects on code generation ***********/
-/* An indicator if zero-extend must be done for the 32-bit operations. */
-extern bool zext_thyself;
-
 /************** Functions that the back-end must provide **************/
 /* Extension for 32-bit operations. */
 extern inline u8 zext(u8 *buf, u8 rd);
@@ -156,10 +152,12 @@ extern u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 
c_off, u32 t_off);
 extern u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func);
 extern u8 arc_to_bpf_return(u8 *buf);
 /*
- * Perform byte swaps on "rd" based on the "size". If "force" is
- * set to "true", do it unconditionally. Otherwise, consider the
- * desired "endian"ness and the host endianness.
+ * - Perform byte swaps on "rd" based on the "size".
+ * - If "force" is set, do it unconditionally. Otherwise, consider the
+ *   desired "endian"ness and the host endianness.
+ * - For data "size"s up to 32 bits, perform a zero-extension if asked
+ *   by the "do_zext" boolean.
  */
-extern u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force);
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext);
 
 #endif /* _ARC_BPF_JIT_H */
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
index b9e803f04a36..8b7ae2f11f38 100644
--- a/arch/arc/net/bpf_jit_arcv2.c
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -1305,7 +1305,7 @@ static u8 arc_b(u8 *buf, s32 offset)
 
 inline u8 zext(u8 *buf, u8 rd)
 {
-       if (zext_thyself && rd != BPF_REG_FP)
+       if (rd != BPF_REG_FP)
                return arc_movi_r(buf, REG_HI(rd), 0);
        else
                return 0;
@@ -2198,7 +2198,7 @@ u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm)
        return len;
 }
 
-u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force)
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
 {
        u8 len = 0;
 #ifdef __BIG_ENDIAN
@@ -2206,36 +2206,19 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool 
force)
 #else
        const u8 host_endian = BPF_FROM_LE;
 #endif
-       /*
-        * If the same endianness, there's not much to do other
-        * than zeroing out the upper bytes based on the "size".
-        */
-       if ((force == false) && (host_endian == endian)) {
-               switch (size) {
-               case 16:
-                       len += arc_and_i(BUF(buf, len), REG_LO(rd), 0xffff);
-                       fallthrough;
-               case 32:
-                       len += zext(BUF(buf, len), rd);
-                       fallthrough;
-               case 64:
-                       break;
-               default:
-                       /* The caller must have handled this. */
-               }
-       } else {
+       if (host_endian != endian || force) {
                switch (size) {
                case 16:
                        /*
                         * r = B4B3_B2B1 << 16 --> r = B2B1_0000
-                        * swape(r) is 0000_B1B2
+                        * then, swape(r) would become the desired 0000_B1B2
                         */
-                       len += arc_asli_r(BUF(buf, len),
-                                         REG_LO(rd), REG_LO(rd), 16);
+                       len = arc_asli_r(buf, REG_LO(rd), REG_LO(rd), 16);
                        fallthrough;
                case 32:
                        len += arc_swape_r(BUF(buf, len), REG_LO(rd));
-                       len += zext(BUF(buf, len), rd);
+                       if (do_zext)
+                               len += zext(BUF(buf, len), rd);
                        break;
                case 64:
                        /*
@@ -2245,7 +2228,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool 
force)
                         *   hi ^= lo;
                         * and then swap the bytes in "hi" and "lo".
                         */
-                       len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_LO(rd));
+                       len  = arc_xor_r(buf, REG_HI(rd), REG_LO(rd));
                        len += arc_xor_r(BUF(buf, len), REG_LO(rd), REG_HI(rd));
                        len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_LO(rd));
                        len += arc_swape_r(BUF(buf, len), REG_LO(rd));
@@ -2254,6 +2237,24 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool 
force)
                default:
                        /* The caller must have handled this. */
                }
+       } else {
+               /*
+                * If the same endianness, there's not much to do other
+                * than zeroing out the upper bytes based on the "size".
+                */
+               switch (size) {
+               case 16:
+                       len = arc_and_i(buf, REG_LO(rd), 0xffff);
+                       fallthrough;
+               case 32:
+                       if (do_zext)
+                               len += zext(BUF(buf, len), rd);
+                       break;
+               case 64:
+                       break;
+               default:
+                       /* The caller must have handled this. */
+               }
        }
 
        return len;
diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
index eea1a469a195..79ec0bbf1153 100644
--- a/arch/arc/net/bpf_jit_core.c
+++ b/arch/arc/net/bpf_jit_core.c
@@ -8,9 +8,6 @@
 #include <asm/bug.h>
 #include "bpf_jit.h"
 
-/* Sane initial values for the globals */
-bool zext_thyself = true;
-
 /*
  * Check for the return value. A pattern used oftenly in this file.
  * There must be a "ret" variable of type "int" in the scope.
@@ -86,6 +83,7 @@ struct arc_jit_data {
  * jit:                        The JIT buffer and its length.
  * bpf_header:         The JITed program header. "jit.buf" points inside it.
  * emit:               If set, opcodes are written to memory; else, a dry-run.
+ * do_zext:            If true, 32-bit sub-regs must be zero extended.
  * bpf2insn:           Maps BPF insn indices to their counterparts in jit.buf.
  * bpf2insn_valid:     Indicates if "bpf2ins" is populated with the mappings.
  * jit_data:           A piece of memory to transfer data to the next pass.
@@ -105,6 +103,7 @@ struct jit_context {
        struct jit_buffer               jit;
        struct bpf_binary_header        *bpf_header;
        bool                            emit;
+       bool                            do_zext;
        u32                             *bpf2insn;
        bool                            bpf2insn_valid;
        struct arc_jit_data             *jit_data;
@@ -185,7 +184,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct 
bpf_prog *prog)
        ctx->success            = false;
 
        /* If the verifier doesn't zero-extend, then we have to do it. */
-       zext_thyself = !ctx->prog->aux->verifier_zext;
+       ctx->do_zext = !ctx->prog->aux->verifier_zext;
 
        return 0;
 }
@@ -250,8 +249,7 @@ static void jit_ctx_cleanup(struct jit_context *ctx)
        }
 
        ctx->emit = false;
-       /* Global booleans set to false. */
-       zext_thyself = false;
+       ctx->do_zext = false;
 }
 
 /*
@@ -415,7 +413,7 @@ static inline void set_need_for_extra_pass(struct 
jit_context *ctx)
  * the back-end for the swap.
  */
 static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
-                      bool force, u8 *len)
+                      bool force, bool do_zext, u8 *len)
 {
        /* Sanity check on the size. */
        switch (size) {
@@ -428,7 +426,7 @@ static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
                return -EINVAL;
        }
 
-       *len = gen_swap(buf, rd, size, endian, force);
+       *len = gen_swap(buf, rd, size, endian, force, do_zext);
 
        return 0;
 }
@@ -866,7 +864,7 @@ static int handle_insn(struct jit_context *ctx, u32 idx)
        case BPF_ALU64 | BPF_END | BPF_FROM_LE: {
                CHECK_RET(handle_swap(buf, dst, imm, BPF_SRC(code),
                                      BPF_CLASS(code) == BPF_ALU64,
-                                     &len));
+                                     ctx->do_zext, &len));
                break;
        }
        /* dst += src (64-bit) */
@@ -1049,11 +1047,12 @@ static int handle_insn(struct jit_context *ctx, u32 idx)
 
        if (BPF_CLASS(code) == BPF_ALU) {
                /*
-                * Even 64-bit swaps are of type BPF_ALU (and not BPF_ALU64).
-                * Therefore, the routine responsible for "swap" specifically
-                * takes care of calling "zext()" based on the input "size".
+                * Skip the "swap" instructions. Even 64-bit swaps are of type
+                * BPF_ALU (and not BPF_ALU64). Therefore, for the swaps, one
+                * has to look at the "size" of the operations rather than the
+                * ALU type. "gen_swap()" specifically takes care of that.
                 */
-               if (BPF_OP(code) != BPF_END)
+               if (BPF_OP(code) != BPF_END && ctx->do_zext)
                        len += zext(BUF(buf, len), dst);
        }
 
-- 
2.35.8


_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

[PATCH 2/7] v2: Turn "zext_thyself" from global into a context var

Reply via email to