https://gcc.gnu.org/g:1687d671459b63b7e19fe33d275ebbcddd43381e

commit 1687d671459b63b7e19fe33d275ebbcddd43381e
Author: Christoph Müllner <christoph.muell...@vrull.eu>
Date:   Wed May 15 12:18:20 2024 -0600

    [v2,1/2] RISC-V: Add cmpmemsi expansion
    
    GCC has a generic cmpmemsi expansion via the by-pieces framework,
    which shows some room for target-specific optimizations.
    E.g. for comparing two aligned memory blocks of 15 bytes
    we get the following sequence:
    
    my_mem_cmp_aligned_15:
            li      a4,0
            j       .L2
    .L8:
            bgeu    a4,a7,.L7
    .L2:
            add     a2,a0,a4
            add     a3,a1,a4
            lbu     a5,0(a2)
            lbu     a6,0(a3)
            addi    a4,a4,1
            li      a7,15    // missed hoisting
            subw    a5,a5,a6
            andi    a5,a5,0xff // useless
            beq     a5,zero,.L8
            lbu     a0,0(a2) // loading again!
            lbu     a5,0(a3) // loading again!
            subw    a0,a0,a5
            ret
    .L7:
            li      a0,0
            ret
    
    Diff first byte: 15 insns
    Diff second byte: 25 insns
    No diff: 25 insns
    
    Possible improvements:
    * unroll the loop and use load-with-displacement to avoid offset increments
    * load and compare multiple (aligned) bytes at once
    * Use the bitmanip/strcmp result calculation (reverse words and
      synthesize (a2 >= a3) ? 1 : -1 in a branchless sequence)
    
    When applying these improvements we get the following sequence:
    
    my_mem_cmp_aligned_15:
            ld      a5,0(a0)
            ld      a4,0(a1)
            bne     a5,a4,.L2
            ld      a5,8(a0)
            ld      a4,8(a1)
            slli    a5,a5,8
            slli    a4,a4,8
            bne     a5,a4,.L2
            li      a0,0
    .L3:
            sext.w  a0,a0
            ret
    .L2:
            rev8    a5,a5
            rev8    a4,a4
            sltu    a5,a5,a4
            neg     a5,a5
            ori     a0,a5,1
            j       .L3
    
    Diff first byte: 11 insns
    Diff second byte: 16 insns
    No diff: 11 insns
    
    This patch implements this improvements.
    
    The tests consist of a execution test (similar to
    gcc/testsuite/gcc.dg/torture/inline-mem-cmp-1.c) and a few tests
    that test the expansion conditions (known length and alignment).
    
    Similar to the cpymemsi expansion this patch does not introduce any
    gating for the cmpmemsi expansion (on top of requiring the known length,
    alignment and Zbb).
    
    Bootstrapped and SPEC CPU 2017 tested.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_block_compare): New
            prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER2): New helper
            for zero_extendhi.
            (do_load_from_addr): Add support for HI and SI/64 modes.
            (do_load): Add helper for zero-extended loads.
            (emit_memcmp_scalar_load_and_compare): New helper to emit memcmp.
            (emit_memcmp_scalar_result_calculation): Likewise.
            (riscv_expand_block_compare_scalar): Likewise.
            (riscv_expand_block_compare): New RISC-V expander for memory 
compare.
            * config/riscv/riscv.md (cmpmemsi): New cmpmem expansion.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/cmpmemsi-1.c: New test.
            * gcc.target/riscv/cmpmemsi-2.c: New test.
            * gcc.target/riscv/cmpmemsi-3.c: New test.
            * gcc.target/riscv/cmpmemsi.c: New test.
    
    (cherry picked from commit 4bf1aa1ab90dd487fadc27c86523ec3562b2d2fe)

Diff:
---
 gcc/config/riscv/riscv-protos.h             |  1 +
 gcc/config/riscv/riscv-string.cc            | 40 +++++++++++++++++----------
 gcc/config/riscv/riscv.md                   | 15 ++++++++++
 gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c |  6 ++++
 gcc/testsuite/gcc.target/riscv/cmpmemsi-2.c | 42 ++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/cmpmemsi-3.c | 43 +++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/cmpmemsi.c   | 22 +++++++++++++++
 7 files changed, 155 insertions(+), 14 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5c8a52b78a2..565ead1382a 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -189,6 +189,7 @@ rtl_opt_pass * make_pass_avlprop (gcc::context *ctxt);
 rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
+extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx);
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_expand_block_clear (rtx, rtx);
 
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 96394844bbb..8f3b6f925e0 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -86,35 +86,47 @@ GEN_EMIT_HELPER2(th_rev) /* do_th_rev2  */
 GEN_EMIT_HELPER2(th_tstnbz) /* do_th_tstnbz2  */
 GEN_EMIT_HELPER3(xor) /* do_xor3  */
 GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+GEN_EMIT_HELPER2(zero_extendhi) /* do_zero_extendhi2  */
 
 #undef GEN_EMIT_HELPER2
 #undef GEN_EMIT_HELPER3
 
-/* Helper function to load a byte or a Pmode register.
+/* Helper function to emit zero-extended loads.
 
-   MODE is the mode to use for the load (QImode or Pmode).
+   MODE is the mode to use for the load.
    DEST is the destination register for the data.
-   ADDR_REG is the register that holds the address.
-   ADDR is the address expression to load from.
-
-   This function returns an rtx containing the register,
-   where the ADDR is stored.  */
+   MEM is the source to load from.  */
 
-static rtx
-do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+static void
+do_load (machine_mode mode, rtx dest, rtx mem)
 {
-  rtx mem = gen_rtx_MEM (mode, addr_reg);
-  MEM_COPY_ATTRIBUTES (mem, addr);
-  set_mem_size (mem, GET_MODE_SIZE (mode));
-
   if (mode == QImode)
     do_zero_extendqi2 (dest, mem);
+  else if (mode == HImode)
+    do_zero_extendhi2 (dest, mem);
+  else if (mode == SImode && TARGET_64BIT)
+    emit_insn (gen_zero_extendsidi2 (dest, mem));
   else if (mode == Xmode)
     emit_move_insn (dest, mem);
   else
     gcc_unreachable ();
+}
+
+/* Helper function to emit zero-extended loads.
+
+   MODE is the mode to use for the load (QImode or Pmode).
+   DEST is the destination register for the data.
+   ADDR_REG is the register that holds the address.
+   ADDR is the address expression to load from.  */
+
+static void
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr_reg);
+  MEM_COPY_ATTRIBUTES (mem, addr);
+  set_mem_size (mem, GET_MODE_SIZE (mode));
 
-  return addr_reg;
+  do_load (mode, dest, mem);
 }
 
 /* Generate a sequence to compare single characters in data1 and data2.
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index ee15c63db10..3cf16e77fdd 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2635,6 +2635,21 @@
   DONE;
 })
 
+(define_expand "cmpmemsi"
+  [(parallel [(set (match_operand:SI 0)
+               (compare:SI (match_operand:BLK 1)
+                           (match_operand:BLK 2)))
+             (use (match_operand:SI 3))
+             (use (match_operand:SI 4))])]
+  "!optimize_size"
+{
+  if (riscv_expand_block_compare (operands[0], operands[1], operands[2],
+                                  operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (define_expand "cpymem<mode>"
   [(parallel [(set (match_operand:BLK 0 "general_operand")
                   (match_operand:BLK 1 "general_operand"))
diff --git a/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c 
b/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c
new file mode 100644
index 00000000000..d7e0bc47407
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c
@@ -0,0 +1,6 @@
+/* { dg-do run } */
+/* { dg-options "-march=rv32gc_zbb -save-temps -g0 -fno-lto" { target { rv32 } 
} } */
+/* { dg-options "-march=rv64gc_zbb -save-temps -g0 -fno-lto" { target { rv64 } 
} } */
+/* { dg-timeout-factor 2 } */
+
+#include "../../gcc.dg/memcmp-1.c"
diff --git a/gcc/testsuite/gcc.target/riscv/cmpmemsi-2.c 
b/gcc/testsuite/gcc.target/riscv/cmpmemsi-2.c
new file mode 100644
index 00000000000..77aa88b5b9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cmpmemsi-2.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zbb" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zbb" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+#include <stddef.h>
+#define aligned32 __attribute__ ((aligned (32)))
+
+const char myconst15[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst23[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst31[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7 };
+
+/* No expansion (unknown alignment) */
+#define MY_MEM_CMP_N(N)                                                \
+int my_mem_cmp_##N(const char *b1, const char *b2)             \
+{                                                              \
+  return __builtin_memcmp (b1, b2, N);                         \
+}
+
+/* No expansion (unknown alignment) */
+#define MY_MEM_CMP_CONST_N(N)                                  \
+int my_mem_cmp_const_##N(const char *b1)                       \
+{                                                              \
+  return __builtin_memcmp (b1, myconst##N, sizeof(myconst##N));        \
+}
+
+MY_MEM_CMP_N(15)
+MY_MEM_CMP_CONST_N(15)
+
+MY_MEM_CMP_N(23)
+MY_MEM_CMP_CONST_N(23)
+
+MY_MEM_CMP_N(31)
+MY_MEM_CMP_CONST_N(31)
+
+/* { dg-final { scan-assembler-times "\t(call|tail)\tmemcmp" 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cmpmemsi-3.c 
b/gcc/testsuite/gcc.target/riscv/cmpmemsi-3.c
new file mode 100644
index 00000000000..193cd4a343e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cmpmemsi-3.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zbb" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zbb" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+#include <stddef.h>
+#define aligned32 __attribute__ ((aligned (32)))
+
+const char myconst15[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst23[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst31[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7,
+                                    0, 1, 2, 3, 4, 5, 6, 7 };
+
+#define MY_MEM_CMP_ALIGNED_N(N)                                        \
+int my_mem_cmp_aligned_##N(const char *b1, const char *b2)     \
+{                                                              \
+  b1 = __builtin_assume_aligned (b1, 4096);                    \
+  b2 = __builtin_assume_aligned (b2, 4096);                    \
+  return __builtin_memcmp (b1, b2, N);                         \
+}
+
+#define MY_MEM_CMP_ALIGNED_CONST_N(N)                          \
+int my_mem_cmp_aligned_const_##N(const char *b1)               \
+{                                                              \
+  b1 = __builtin_assume_aligned (b1, 4096);                    \
+  return __builtin_memcmp (b1, myconst##N, sizeof(myconst##N));        \
+}
+
+MY_MEM_CMP_ALIGNED_N(15)
+MY_MEM_CMP_ALIGNED_CONST_N(15)
+
+MY_MEM_CMP_ALIGNED_N(23)
+MY_MEM_CMP_ALIGNED_CONST_N(23)
+
+MY_MEM_CMP_ALIGNED_N(31)
+MY_MEM_CMP_ALIGNED_CONST_N(31)
+
+/* { dg-final { scan-assembler-not "\t(call|tail)\tmemcmp" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cmpmemsi.c 
b/gcc/testsuite/gcc.target/riscv/cmpmemsi.c
new file mode 100644
index 00000000000..f4ccf269924
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cmpmemsi.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zbb" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zbb" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+#include <stddef.h>
+
+/* No expansion (unknown size) */
+int my_mem_cmp_n(const char *b1, const char *b2, size_t n)
+{
+  return __builtin_memcmp (b1, b2, n);
+}
+
+/* No expansion (unknown size) */
+int my_mem_cmp_aligned(const char *b1, const char *b2, size_t n)
+{
+  b1 = __builtin_assume_aligned (b1, 4096);
+  b2 = __builtin_assume_aligned (b2, 4096);
+  return __builtin_memcmp (b1, b2, n);
+}
+
+/* { dg-final { scan-assembler-times "\t(call|tail)\tmemcmp" 2 } } */

Reply via email to