I know we're already in stage 3, so I'm basically asking for a review and am
fine with deferring this to GCC 17. But if it's acceptable for GCC 16, that
would be great too. :P

---

The previous reversed CRC implementation used explicit bit reflection
before and after the CRC computation:

  reflect(crc_init);
  reflect(data);
  for (int i = 0; i < data_bit_size / 8; i++)
    crc = (crc << 8) ^ table[(crc >> (crc_bit_size - 8))
                             ^ (data >> (data_bit_size - (i+1) * 8) & 0xFF)];
  reflect(crc);

This patch generates a reversed polynomial lookup table directly,
eliminating the need for bit reflection operations.  The new algorithm:

  for (int i = 0; i < data_bit_size / 8; i++)
    crc = (crc >> 8) ^ table[(crc ^ (data >> (i * 8))) & 0xFF];

This improves code generation for all targets using table-based reversed
CRC, as it removes the overhead of reflecting input data and CRC values.

Ref:

[1] "Reversing CRC - Theory and Practice"
  
https://sar.informatik.hu-berlin.de/research/publications/SAR-PR-2006-05/SAR-PR-2006-05_.pdf

gcc/ChangeLog:

        * expr.cc (calculate_reversed_crc): New function.
        (assemble_reversed_crc_table): New function.
        (generate_reversed_crc_table): New function.
        (calculate_table_based_reversed_CRC): New function.
        (expand_reversed_crc_table_based): Remove gen_reflecting_code
        parameter.  Use calculate_table_based_reversed_CRC.
        * expr.h (expand_reversed_crc_table_based): Update prototype.
        * builtins.cc (expand_builtin_crc_table_based): Update call.
        * internal-fn.cc (expand_crc_optab_fn): Update call.
        * config/aarch64/aarch64.md (crc_rev<ALLI:mode><ALLX:mode>4):
        Update call.
        * config/i386/i386.md (crc_rev<SWI124:mode>si4): Update call.
        * config/loongarch/loongarch.md (crc_rev<mode>si4): Update call.
        Remove local rbit lambda.
        * config/riscv/bitmanip.md (crc_rev<ANYI1:mode><ANYI:mode>4):
        Update call.  Remove TARGET_ZBKB case.
        * config/riscv/riscv.cc (generate_reflecting_code_using_brev):
        Remove.
        * config/riscv/riscv-protos.h (generate_reflecting_code_using_brev):
        Remove declaration.
---
 gcc/builtins.cc                   |   3 +-
 gcc/config/aarch64/aarch64.md     |   3 +-
 gcc/config/i386/i386.md           |   3 +-
 gcc/config/loongarch/loongarch.md |  17 +---
 gcc/config/riscv/bitmanip.md      |  13 +--
 gcc/config/riscv/riscv-protos.h   |   1 -
 gcc/config/riscv/riscv.cc         |  33 ------
 gcc/expr.cc                       | 160 ++++++++++++++++++++++++++----
 gcc/expr.h                        |   3 +-
 gcc/internal-fn.cc                |   3 +-
 10 files changed, 148 insertions(+), 91 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index bbe181a5128..4f4694f2e2d 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -7828,8 +7828,7 @@ expand_builtin_crc_table_based (internal_fn fn, 
scalar_mode crc_mode,
   else
     /* If it's IFN_CRC_REV generate bit-reversed CRC.  */
     expand_reversed_crc_table_based (target, op1, op2, op3,
-                                    data_mode,
-                                    generate_reflecting_code_standard);
+                                    data_mode);
   return target;
 }
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index de6b1d0ed06..1171ed8258b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4878,8 +4878,7 @@ (define_expand "crc_rev<ALLI:mode><ALLX:mode>4"
     else
       /* Otherwise, generate table-based CRC.  */
       expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
-                                      operands[3], <ALLI:MODE>mode,
-                                      generate_reflecting_code_standard);
+                                      operands[3], <ALLI:MODE>mode);
     DONE;
   }
 )
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6af7dcfcdd3..efb7c768ca3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -29719,8 +29719,7 @@ (define_expand "crc_rev<SWI124:mode>si4"
     emit_insn (gen_sse4_2_crc32<mode> (operands[0], operands[1], operands[2]));
   else
     expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
-                                    operands[3], <SWI124:MODE>mode,
-                                    generate_reflecting_code_standard);
+                                    operands[3], <SWI124:MODE>mode);
   DONE;
 })
 
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 763d514cac7..bfe99877aa1 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -4687,23 +4687,8 @@ (define_expand "crc_rev<mode>si4"
       }
     else
       {
-       /* No CRC instruction is suitable, use the generic table-based
-          implementation but optimize bit reversion.  */
-       auto rbit = [](rtx *r)
-         {
-           /* Well, this is ugly.  The problem is
-              expand_reversed_crc_table_based only accepts one helper
-              for reversing data elements and CRC states.  */
-           auto mode = GET_MODE (*r);
-           auto rbit = (mode == <MODE>mode ? gen_rbit<mode> : gen_rbitsi);
-           rtx out = gen_reg_rtx (mode);
-
-           emit_insn (rbit (out, *r));
-           *r = out;
-         };
        expand_reversed_crc_table_based (operands[0], operands[1],
-                                        msg, operands[3], <MODE>mode,
-                                        rbit);
+                                        msg, operands[3], <MODE>mode);
       }
     DONE;
   })
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 166ddd9db9e..9e63e48beed 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1234,25 +1234,16 @@ (define_expand "crc_rev<ANYI1:mode><ANYI:mode>4"
      it is possible to store the quotient within a single variable
      (E.g.  CRC64's quotient may need 65 bits,
      we can't keep it in 64 bit variable.)
-     then use clmul instruction to implement the CRC,
-     otherwise (TARGET_ZBKB) generate table based using brev.  */
+     then use clmul instruction to implement the CRC.  */
   if ((TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC) && <ANYI:MODE>mode < 
word_mode)
     expand_reversed_crc_using_clmul (<ANYI:MODE>mode, <ANYI1:MODE>mode,
                                     operands);
-  else if (TARGET_ZBKB)
-    /* Generate table-based CRC.
-       To reflect values use brev and bswap instructions.  */
-    expand_reversed_crc_table_based (operands[0], operands[1],
-                                    operands[2], operands[3],
-                                    GET_MODE (operands[2]),
-                                    generate_reflecting_code_using_brev);
   else
     /* Generate table-based CRC.
        To reflect values use standard reflecting algorithm.  */
     expand_reversed_crc_table_based (operands[0], operands[1],
                                     operands[2], operands[3],
-                                    GET_MODE (operands[2]),
-                                    generate_reflecting_code_standard);
+                                    GET_MODE (operands[2]));
   DONE;
 })
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a372779cf9f..199006c4cb5 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -181,7 +181,6 @@ extern bool riscv_reg_frame_related (rtx);
 extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT, HOST_WIDE_INT *,
                                        HOST_WIDE_INT *);
 extern bool riscv_vector_float_type_p (const_tree type);
-extern void generate_reflecting_code_using_brev (rtx *);
 extern void expand_crc_using_clmul (scalar_mode, scalar_mode, rtx *);
 extern void expand_reversed_crc_using_clmul (scalar_mode, scalar_mode, rtx *);
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2d14b3c92f5..c6883b53f4f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -15339,39 +15339,6 @@ riscv_use_by_pieces_infrastructure_p (unsigned 
HOST_WIDE_INT size,
   return default_use_by_pieces_infrastructure_p (size, alignment, op, speed_p);
 }
 
-/* Generate instruction sequence
-   which reflects the value of the OP using bswap and brev8 instructions.
-   OP's mode may be less than word_mode, to get the correct number,
-   after reflecting we shift right the value by SHIFT_VAL.
-   E.g. we have 1111 0001, after reflection (target 32-bit) we will get
-   1000 1111 0000 0000, if we shift-out 16 bits,
-   we will get the desired one: 1000 1111.  */
-
-void
-generate_reflecting_code_using_brev (rtx *op_p)
-{
-  rtx op = *op_p;
-  machine_mode op_mode = GET_MODE (op);
-
-  /* OP may be smaller than a word.  We can use a paradoxical subreg
-     to compensate for that.  It should never be larger than a word
-     for RISC-V.  */
-  gcc_assert (op_mode <= word_mode);
-  if (op_mode != word_mode)
-    op = gen_lowpart (word_mode, op);
-
-  HOST_WIDE_INT shift_val = (BITS_PER_WORD
-                            - GET_MODE_BITSIZE (op_mode).to_constant ());
-  riscv_expand_op (BSWAP, word_mode, op, op, op);
-  riscv_expand_op (LSHIFTRT, word_mode, op, op,
-                  gen_int_mode (shift_val, word_mode));
-  if (TARGET_64BIT)
-    emit_insn (gen_riscv_brev8_di (op, op));
-  else
-    emit_insn (gen_riscv_brev8_si (op, op));
-}
-
-
 /* Generate assembly to calculate CRC using clmul instruction.
    The following code will be generated when the CRC and data sizes are equal:
      li      a4,quotient
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 7d84ad9e6fc..bfbcf0bfb08 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -14485,6 +14485,75 @@ generate_crc_table (unsigned HOST_WIDE_INT polynom, 
unsigned short crc_bits)
   return assemble_crc_table (polynom, crc_bits);
 }
 
+/* Calculate CRC for the initial CRC and given POLYNOMIAL.
+   CRC_BITS is CRC size.  */
+
+static unsigned HOST_WIDE_INT
+calculate_reversed_crc (unsigned HOST_WIDE_INT crc,
+                       unsigned HOST_WIDE_INT polynomial,
+                       unsigned short crc_bits)
+{
+  unsigned HOST_WIDE_INT rev_polynom = reflect_hwi (polynomial, crc_bits);
+  for (int j = 0; j < 8; j++)
+    {
+      if (crc & 1)
+       crc = (crc >> 1) ^ rev_polynom;
+      else
+       crc >>= 1;
+    }
+  /* Zero out bits in crc beyond the specified number of crc_bits.  */
+  if (crc_bits < sizeof (crc) * CHAR_BIT)
+    crc &= (HOST_WIDE_INT_1U << crc_bits) - 1;
+  return crc;
+}
+
+/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS.
+   POLYNOM is the polynomial used to calculate the CRC table's elements.
+   CRC_BITS is the size of CRC, may be 8, 16, ... . */
+
+static rtx
+assemble_reversed_crc_table (unsigned HOST_WIDE_INT polynom, unsigned short 
crc_bits)
+{
+  unsigned table_el_n = 0x100;
+  tree ar = build_array_type (make_unsigned_type (crc_bits),
+                             build_index_type (size_int (table_el_n - 1)));
+
+  /* Initialize the table.  */
+  vec<tree, va_gc> *initial_values;
+  vec_alloc (initial_values, table_el_n);
+  for (size_t i = 0; i < table_el_n; ++i)
+    {
+      unsigned HOST_WIDE_INT crc = calculate_reversed_crc (i, polynom, 
crc_bits);
+      tree element = build_int_cstu (make_unsigned_type (crc_bits), crc);
+      vec_safe_push (initial_values, element);
+    }
+  tree ctor = build_constructor_from_vec (ar, initial_values);
+  rtx mem = output_constant_def (ctor, 1);
+  gcc_assert (MEM_P (mem));
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file,
+              ";; emitting reversed crc table crc_%u_polynomial_"
+              HOST_WIDE_INT_PRINT_HEX " ",
+              crc_bits, polynom);
+      print_rtl_single (dump_file, XEXP (mem, 0));
+      fprintf (dump_file, "\n");
+    }
+
+  return XEXP (mem, 0);
+}
+
+/* Generate reversed CRC table for the given POLYNOM and CRC_BITS.  */
+
+static rtx
+generate_reversed_crc_table (unsigned HOST_WIDE_INT polynom,
+                            unsigned short crc_bits)
+{
+  gcc_assert (crc_bits <= 64);
+
+  return assemble_reversed_crc_table (polynom, crc_bits);
+}
+
 /* Generate table-based CRC code for the given CRC, INPUT_DATA and the
    POLYNOMIAL (without leading 1).
 
@@ -14564,6 +14633,68 @@ calculate_table_based_CRC (rtx *crc, const rtx 
&input_data,
     }
 }
 
+/* Generate table-based reversed CRC code for the given CRC, INPUT_DATA
+   and the POLYNOMIAL (without leading 1).
+
+   This function generates code for reversed (bit-reflected) CRC calculation
+   using a pre-computed lookup table.  Unlike the standard CRC calculation,
+   this processes data from LSB to MSB, eliminating the need for explicit
+   bit reflection before and after the CRC computation.  */
+
+static void
+calculate_table_based_reversed_CRC (rtx *crc, const rtx &input_data,
+                                   const rtx &polynomial,
+                                   machine_mode data_mode)
+{
+  machine_mode mode = GET_MODE (*crc);
+  unsigned short crc_bit_size = GET_MODE_BITSIZE (mode).to_constant ();
+  unsigned short data_size = GET_MODE_SIZE (data_mode).to_constant ();
+  rtx tab = generate_reversed_crc_table (UINTVAL (polynomial), crc_bit_size);
+
+  for (unsigned short i = 0; i < data_size; i++)
+    {
+      *crc = force_reg (mode, *crc);
+
+      /* data >> (8 * i).  */
+      unsigned range_8 = 8 * i;
+      /* CRC's mode is always at least as wide as INPUT_DATA.  Convert
+        INPUT_DATA into CRC's mode.  */
+      rtx data = gen_reg_rtx (mode);
+      convert_move (data, input_data, 1);
+      data = expand_shift (RSHIFT_EXPR, mode, data, range_8, NULL_RTX, 1);
+
+      /* crc ^ (data >> (8 * i)).  */
+      rtx in = expand_binop (mode, xor_optab, *crc, data,
+                            NULL_RTX, 1, OPTAB_WIDEN);
+
+      /* (crc ^ data) & 0xFF.  */
+      rtx index = expand_and (mode, in, gen_int_mode (255, mode),
+                             NULL_RTX);
+      int log_crc_size = exact_log2 (GET_MODE_SIZE (mode).to_constant ());
+      index = expand_shift (LSHIFT_EXPR, mode, index,
+                           log_crc_size, NULL_RTX, 0);
+
+      rtx addr = gen_reg_rtx (Pmode);
+      convert_move (addr, index, 1);
+      addr = expand_binop (Pmode, add_optab, addr, tab, NULL_RTX,
+                           0, OPTAB_DIRECT);
+
+      /* crc_table[(crc ^ data) & 0xFF].  */
+      rtx tab_el = validize_mem (gen_rtx_MEM (mode, addr));
+
+      /* (crc >> 8) if CRC is larger than 8, otherwise 0.  */
+      rtx high = NULL_RTX;
+      if (crc_bit_size != 8)
+       high = expand_shift (RSHIFT_EXPR, mode, *crc, 8, NULL_RTX, 1);
+      else
+       high = gen_int_mode (0, mode);
+
+      /* crc = (crc >> 8) ^ crc_table[(crc ^ data) & 0xFF].  */
+      *crc = expand_binop (mode, xor_optab, tab_el, high, NULL_RTX, 1,
+                          OPTAB_WIDEN);
+    }
+}
+
 /* Generate table-based CRC code for the given CRC, INPUT_DATA and the
    POLYNOMIAL (without leading 1).
 
@@ -14701,41 +14832,30 @@ generate_reflecting_code_standard (rtx *op)
    the POLYNOMIAL (without leading 1).
 
    CRC is OP1, data is OP2 and the polynomial is OP3.
-   This must generate CRC table and assembly for the following code,
+   This generates a reversed CRC table and assembly for the following code,
    where crc_bit_size and data_bit_size may be 8, 16, 32, 64:
    uint_crc_bit_size_t
    crc_crc_bit_size (uint_crc_bit_size_t crc_init,
-                          uint_data_bit_size_t data, size_t size)
+                    uint_data_bit_size_t data)
    {
-     reflect (crc_init)
      uint_crc_bit_size_t crc = crc_init;
-     reflect (data);
      for (int i = 0; i < data_bit_size / 8; i++)
-       crc = (crc << 8) ^ crc_table[(crc >> (crc_bit_size - 8))
-                         ^ (data >> (data_bit_size - (i + 1) * 8) & 0xFF))];
-     reflect (crc);
+       crc = (crc >> 8) ^ crc_table[(crc ^ (data >> (i * 8))) & 0xFF];
      return crc;
-   }  */
+   }
+
+   This approach uses a pre-computed reversed polynomial table, eliminating
+   the need for explicit bit reflection before and after the CRC computation.  
*/
 
 void
 expand_reversed_crc_table_based (rtx op0, rtx op1, rtx op2, rtx op3,
-                                machine_mode data_mode,
-                                void (*gen_reflecting_code) (rtx *op))
+                                machine_mode data_mode)
 {
   gcc_assert (!CONST_INT_P (op0));
   gcc_assert (CONST_INT_P (op3));
   machine_mode crc_mode = GET_MODE (op0);
-
   rtx crc = gen_reg_rtx (crc_mode);
   convert_move (crc, op1, 0);
-  gen_reflecting_code (&crc);
-
-  rtx data = gen_reg_rtx (data_mode);
-  convert_move (data, op2, 0);
-  gen_reflecting_code (&data);
-
-  calculate_table_based_CRC (&crc, data, op3, data_mode);
-
-  gen_reflecting_code (&crc);
+  calculate_table_based_reversed_CRC (&crc, op2, op3, data_mode);
   convert_move (op0, crc, 0);
 }
diff --git a/gcc/expr.h b/gcc/expr.h
index 060151df010..40627c4f113 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -385,8 +385,7 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT, 
unsigned short);
 /* Generate table-based CRC.  */
 extern void generate_reflecting_code_standard (rtx *);
 extern void expand_crc_table_based (rtx, rtx, rtx, rtx, machine_mode);
-extern void expand_reversed_crc_table_based (rtx, rtx, rtx, rtx, machine_mode,
-                                            void (*) (rtx *));
+extern void expand_reversed_crc_table_based (rtx, rtx, rtx, rtx, machine_mode);
 
 /* Cache of the "extended" flag in the target's _BitInt description
    for use during expand.  */
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 13fbd2ce788..acad06db8d7 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4087,8 +4087,7 @@ expand_crc_optab_fn (internal_fn fn, gcall *stmt, 
convert_optab optab)
       else
        /* If it's IFN_CRC_REV generate bit-reversed CRC.  */
        expand_reversed_crc_table_based (dest, crc, data, polynomial,
-                                        TYPE_MODE (data_type),
-                                        generate_reflecting_code_standard);
+                                        TYPE_MODE (data_type));
 
       /* Now get the return value where it needs to be, taking care to
         ensure it's promoted appropriately if the ABI demands it.
-- 
2.34.1

Reply via email to