I know we're already in stage 3, so I'm basically asking for a review and am
fine with deferring this to GCC 17. But if it's acceptable for GCC 16, that
would be great too. :P
---
The previous reversed CRC implementation used explicit bit reflection
before and after the CRC computation:
reflect(crc_init);
reflect(data);
for (int i = 0; i < data_bit_size / 8; i++)
crc = (crc << 8) ^ table[(crc >> (crc_bit_size - 8))
^ (data >> (data_bit_size - (i+1) * 8) & 0xFF)];
reflect(crc);
This patch generates a reversed polynomial lookup table directly,
eliminating the need for bit reflection operations. The new algorithm:
for (int i = 0; i < data_bit_size / 8; i++)
crc = (crc >> 8) ^ table[(crc ^ (data >> (i * 8))) & 0xFF];
This improves code generation for all targets using table-based reversed
CRC, as it removes the overhead of reflecting input data and CRC values.
Ref:
[1] "Reversing CRC - Theory and Practice"
https://sar.informatik.hu-berlin.de/research/publications/SAR-PR-2006-05/SAR-PR-2006-05_.pdf
gcc/ChangeLog:
* expr.cc (calculate_reversed_crc): New function.
(assemble_reversed_crc_table): New function.
(generate_reversed_crc_table): New function.
(calculate_table_based_reversed_CRC): New function.
(expand_reversed_crc_table_based): Remove gen_reflecting_code
parameter. Use calculate_table_based_reversed_CRC.
* expr.h (expand_reversed_crc_table_based): Update prototype.
* builtins.cc (expand_builtin_crc_table_based): Update call.
* internal-fn.cc (expand_crc_optab_fn): Update call.
* config/aarch64/aarch64.md (crc_rev<ALLI:mode><ALLX:mode>4):
Update call.
* config/i386/i386.md (crc_rev<SWI124:mode>si4): Update call.
* config/loongarch/loongarch.md (crc_rev<mode>si4): Update call.
Remove local rbit lambda.
* config/riscv/bitmanip.md (crc_rev<ANYI1:mode><ANYI:mode>4):
Update call. Remove TARGET_ZBKB case.
* config/riscv/riscv.cc (generate_reflecting_code_using_brev):
Remove.
* config/riscv/riscv-protos.h (generate_reflecting_code_using_brev):
Remove declaration.
---
gcc/builtins.cc | 3 +-
gcc/config/aarch64/aarch64.md | 3 +-
gcc/config/i386/i386.md | 3 +-
gcc/config/loongarch/loongarch.md | 17 +---
gcc/config/riscv/bitmanip.md | 13 +--
gcc/config/riscv/riscv-protos.h | 1 -
gcc/config/riscv/riscv.cc | 33 ------
gcc/expr.cc | 160 ++++++++++++++++++++++++++----
gcc/expr.h | 3 +-
gcc/internal-fn.cc | 3 +-
10 files changed, 148 insertions(+), 91 deletions(-)
diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index bbe181a5128..4f4694f2e2d 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -7828,8 +7828,7 @@ expand_builtin_crc_table_based (internal_fn fn,
scalar_mode crc_mode,
else
/* If it's IFN_CRC_REV generate bit-reversed CRC. */
expand_reversed_crc_table_based (target, op1, op2, op3,
- data_mode,
- generate_reflecting_code_standard);
+ data_mode);
return target;
}
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index de6b1d0ed06..1171ed8258b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4878,8 +4878,7 @@ (define_expand "crc_rev<ALLI:mode><ALLX:mode>4"
else
/* Otherwise, generate table-based CRC. */
expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
- operands[3], <ALLI:MODE>mode,
- generate_reflecting_code_standard);
+ operands[3], <ALLI:MODE>mode);
DONE;
}
)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6af7dcfcdd3..efb7c768ca3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -29719,8 +29719,7 @@ (define_expand "crc_rev<SWI124:mode>si4"
emit_insn (gen_sse4_2_crc32<mode> (operands[0], operands[1], operands[2]));
else
expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
- operands[3], <SWI124:MODE>mode,
- generate_reflecting_code_standard);
+ operands[3], <SWI124:MODE>mode);
DONE;
})
diff --git a/gcc/config/loongarch/loongarch.md
b/gcc/config/loongarch/loongarch.md
index 763d514cac7..bfe99877aa1 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -4687,23 +4687,8 @@ (define_expand "crc_rev<mode>si4"
}
else
{
- /* No CRC instruction is suitable, use the generic table-based
- implementation but optimize bit reversion. */
- auto rbit = [](rtx *r)
- {
- /* Well, this is ugly. The problem is
- expand_reversed_crc_table_based only accepts one helper
- for reversing data elements and CRC states. */
- auto mode = GET_MODE (*r);
- auto rbit = (mode == <MODE>mode ? gen_rbit<mode> : gen_rbitsi);
- rtx out = gen_reg_rtx (mode);
-
- emit_insn (rbit (out, *r));
- *r = out;
- };
expand_reversed_crc_table_based (operands[0], operands[1],
- msg, operands[3], <MODE>mode,
- rbit);
+ msg, operands[3], <MODE>mode);
}
DONE;
})
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 166ddd9db9e..9e63e48beed 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1234,25 +1234,16 @@ (define_expand "crc_rev<ANYI1:mode><ANYI:mode>4"
it is possible to store the quotient within a single variable
(E.g. CRC64's quotient may need 65 bits,
we can't keep it in 64 bit variable.)
- then use clmul instruction to implement the CRC,
- otherwise (TARGET_ZBKB) generate table based using brev. */
+ then use clmul instruction to implement the CRC. */
if ((TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC) && <ANYI:MODE>mode <
word_mode)
expand_reversed_crc_using_clmul (<ANYI:MODE>mode, <ANYI1:MODE>mode,
operands);
- else if (TARGET_ZBKB)
- /* Generate table-based CRC.
- To reflect values use brev and bswap instructions. */
- expand_reversed_crc_table_based (operands[0], operands[1],
- operands[2], operands[3],
- GET_MODE (operands[2]),
- generate_reflecting_code_using_brev);
else
/* Generate table-based CRC.
To reflect values use standard reflecting algorithm. */
expand_reversed_crc_table_based (operands[0], operands[1],
operands[2], operands[3],
- GET_MODE (operands[2]),
- generate_reflecting_code_standard);
+ GET_MODE (operands[2]));
DONE;
})
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a372779cf9f..199006c4cb5 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -181,7 +181,6 @@ extern bool riscv_reg_frame_related (rtx);
extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT, HOST_WIDE_INT *,
HOST_WIDE_INT *);
extern bool riscv_vector_float_type_p (const_tree type);
-extern void generate_reflecting_code_using_brev (rtx *);
extern void expand_crc_using_clmul (scalar_mode, scalar_mode, rtx *);
extern void expand_reversed_crc_using_clmul (scalar_mode, scalar_mode, rtx *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2d14b3c92f5..c6883b53f4f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -15339,39 +15339,6 @@ riscv_use_by_pieces_infrastructure_p (unsigned
HOST_WIDE_INT size,
return default_use_by_pieces_infrastructure_p (size, alignment, op, speed_p);
}
-/* Generate instruction sequence
- which reflects the value of the OP using bswap and brev8 instructions.
- OP's mode may be less than word_mode, to get the correct number,
- after reflecting we shift right the value by SHIFT_VAL.
- E.g. we have 1111 0001, after reflection (target 32-bit) we will get
- 1000 1111 0000 0000, if we shift-out 16 bits,
- we will get the desired one: 1000 1111. */
-
-void
-generate_reflecting_code_using_brev (rtx *op_p)
-{
- rtx op = *op_p;
- machine_mode op_mode = GET_MODE (op);
-
- /* OP may be smaller than a word. We can use a paradoxical subreg
- to compensate for that. It should never be larger than a word
- for RISC-V. */
- gcc_assert (op_mode <= word_mode);
- if (op_mode != word_mode)
- op = gen_lowpart (word_mode, op);
-
- HOST_WIDE_INT shift_val = (BITS_PER_WORD
- - GET_MODE_BITSIZE (op_mode).to_constant ());
- riscv_expand_op (BSWAP, word_mode, op, op, op);
- riscv_expand_op (LSHIFTRT, word_mode, op, op,
- gen_int_mode (shift_val, word_mode));
- if (TARGET_64BIT)
- emit_insn (gen_riscv_brev8_di (op, op));
- else
- emit_insn (gen_riscv_brev8_si (op, op));
-}
-
-
/* Generate assembly to calculate CRC using clmul instruction.
The following code will be generated when the CRC and data sizes are equal:
li a4,quotient
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 7d84ad9e6fc..bfbcf0bfb08 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -14485,6 +14485,75 @@ generate_crc_table (unsigned HOST_WIDE_INT polynom,
unsigned short crc_bits)
return assemble_crc_table (polynom, crc_bits);
}
+/* Calculate CRC for the initial CRC and given POLYNOMIAL.
+ CRC_BITS is CRC size. */
+
+static unsigned HOST_WIDE_INT
+calculate_reversed_crc (unsigned HOST_WIDE_INT crc,
+ unsigned HOST_WIDE_INT polynomial,
+ unsigned short crc_bits)
+{
+ unsigned HOST_WIDE_INT rev_polynom = reflect_hwi (polynomial, crc_bits);
+ for (int j = 0; j < 8; j++)
+ {
+ if (crc & 1)
+ crc = (crc >> 1) ^ rev_polynom;
+ else
+ crc >>= 1;
+ }
+ /* Zero out bits in crc beyond the specified number of crc_bits. */
+ if (crc_bits < sizeof (crc) * CHAR_BIT)
+ crc &= (HOST_WIDE_INT_1U << crc_bits) - 1;
+ return crc;
+}
+
+/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS.
+ POLYNOM is the polynomial used to calculate the CRC table's elements.
+ CRC_BITS is the size of CRC, may be 8, 16, ... . */
+
+static rtx
+assemble_reversed_crc_table (unsigned HOST_WIDE_INT polynom, unsigned short
crc_bits)
+{
+ unsigned table_el_n = 0x100;
+ tree ar = build_array_type (make_unsigned_type (crc_bits),
+ build_index_type (size_int (table_el_n - 1)));
+
+ /* Initialize the table. */
+ vec<tree, va_gc> *initial_values;
+ vec_alloc (initial_values, table_el_n);
+ for (size_t i = 0; i < table_el_n; ++i)
+ {
+ unsigned HOST_WIDE_INT crc = calculate_reversed_crc (i, polynom,
crc_bits);
+ tree element = build_int_cstu (make_unsigned_type (crc_bits), crc);
+ vec_safe_push (initial_values, element);
+ }
+ tree ctor = build_constructor_from_vec (ar, initial_values);
+ rtx mem = output_constant_def (ctor, 1);
+ gcc_assert (MEM_P (mem));
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ ";; emitting reversed crc table crc_%u_polynomial_"
+ HOST_WIDE_INT_PRINT_HEX " ",
+ crc_bits, polynom);
+ print_rtl_single (dump_file, XEXP (mem, 0));
+ fprintf (dump_file, "\n");
+ }
+
+ return XEXP (mem, 0);
+}
+
+/* Generate reversed CRC table for the given POLYNOM and CRC_BITS. */
+
+static rtx
+generate_reversed_crc_table (unsigned HOST_WIDE_INT polynom,
+ unsigned short crc_bits)
+{
+ gcc_assert (crc_bits <= 64);
+
+ return assemble_reversed_crc_table (polynom, crc_bits);
+}
+
/* Generate table-based CRC code for the given CRC, INPUT_DATA and the
POLYNOMIAL (without leading 1).
@@ -14564,6 +14633,68 @@ calculate_table_based_CRC (rtx *crc, const rtx
&input_data,
}
}
+/* Generate table-based reversed CRC code for the given CRC, INPUT_DATA
+ and the POLYNOMIAL (without leading 1).
+
+ This function generates code for reversed (bit-reflected) CRC calculation
+ using a pre-computed lookup table. Unlike the standard CRC calculation,
+ this processes data from LSB to MSB, eliminating the need for explicit
+ bit reflection before and after the CRC computation. */
+
+static void
+calculate_table_based_reversed_CRC (rtx *crc, const rtx &input_data,
+ const rtx &polynomial,
+ machine_mode data_mode)
+{
+ machine_mode mode = GET_MODE (*crc);
+ unsigned short crc_bit_size = GET_MODE_BITSIZE (mode).to_constant ();
+ unsigned short data_size = GET_MODE_SIZE (data_mode).to_constant ();
+ rtx tab = generate_reversed_crc_table (UINTVAL (polynomial), crc_bit_size);
+
+ for (unsigned short i = 0; i < data_size; i++)
+ {
+ *crc = force_reg (mode, *crc);
+
+ /* data >> (8 * i). */
+ unsigned range_8 = 8 * i;
+ /* CRC's mode is always at least as wide as INPUT_DATA. Convert
+ INPUT_DATA into CRC's mode. */
+ rtx data = gen_reg_rtx (mode);
+ convert_move (data, input_data, 1);
+ data = expand_shift (RSHIFT_EXPR, mode, data, range_8, NULL_RTX, 1);
+
+ /* crc ^ (data >> (8 * i)). */
+ rtx in = expand_binop (mode, xor_optab, *crc, data,
+ NULL_RTX, 1, OPTAB_WIDEN);
+
+ /* (crc ^ data) & 0xFF. */
+ rtx index = expand_and (mode, in, gen_int_mode (255, mode),
+ NULL_RTX);
+ int log_crc_size = exact_log2 (GET_MODE_SIZE (mode).to_constant ());
+ index = expand_shift (LSHIFT_EXPR, mode, index,
+ log_crc_size, NULL_RTX, 0);
+
+ rtx addr = gen_reg_rtx (Pmode);
+ convert_move (addr, index, 1);
+ addr = expand_binop (Pmode, add_optab, addr, tab, NULL_RTX,
+ 0, OPTAB_DIRECT);
+
+ /* crc_table[(crc ^ data) & 0xFF]. */
+ rtx tab_el = validize_mem (gen_rtx_MEM (mode, addr));
+
+ /* (crc >> 8) if CRC is larger than 8, otherwise 0. */
+ rtx high = NULL_RTX;
+ if (crc_bit_size != 8)
+ high = expand_shift (RSHIFT_EXPR, mode, *crc, 8, NULL_RTX, 1);
+ else
+ high = gen_int_mode (0, mode);
+
+ /* crc = (crc >> 8) ^ crc_table[(crc ^ data) & 0xFF]. */
+ *crc = expand_binop (mode, xor_optab, tab_el, high, NULL_RTX, 1,
+ OPTAB_WIDEN);
+ }
+}
+
/* Generate table-based CRC code for the given CRC, INPUT_DATA and the
POLYNOMIAL (without leading 1).
@@ -14701,41 +14832,30 @@ generate_reflecting_code_standard (rtx *op)
the POLYNOMIAL (without leading 1).
CRC is OP1, data is OP2 and the polynomial is OP3.
- This must generate CRC table and assembly for the following code,
+ This generates a reversed CRC table and assembly for the following code,
where crc_bit_size and data_bit_size may be 8, 16, 32, 64:
uint_crc_bit_size_t
crc_crc_bit_size (uint_crc_bit_size_t crc_init,
- uint_data_bit_size_t data, size_t size)
+ uint_data_bit_size_t data)
{
- reflect (crc_init)
uint_crc_bit_size_t crc = crc_init;
- reflect (data);
for (int i = 0; i < data_bit_size / 8; i++)
- crc = (crc << 8) ^ crc_table[(crc >> (crc_bit_size - 8))
- ^ (data >> (data_bit_size - (i + 1) * 8) & 0xFF))];
- reflect (crc);
+ crc = (crc >> 8) ^ crc_table[(crc ^ (data >> (i * 8))) & 0xFF];
return crc;
- } */
+ }
+
+ This approach uses a pre-computed reversed polynomial table, eliminating
+ the need for explicit bit reflection before and after the CRC computation.
*/
void
expand_reversed_crc_table_based (rtx op0, rtx op1, rtx op2, rtx op3,
- machine_mode data_mode,
- void (*gen_reflecting_code) (rtx *op))
+ machine_mode data_mode)
{
gcc_assert (!CONST_INT_P (op0));
gcc_assert (CONST_INT_P (op3));
machine_mode crc_mode = GET_MODE (op0);
-
rtx crc = gen_reg_rtx (crc_mode);
convert_move (crc, op1, 0);
- gen_reflecting_code (&crc);
-
- rtx data = gen_reg_rtx (data_mode);
- convert_move (data, op2, 0);
- gen_reflecting_code (&data);
-
- calculate_table_based_CRC (&crc, data, op3, data_mode);
-
- gen_reflecting_code (&crc);
+ calculate_table_based_reversed_CRC (&crc, op2, op3, data_mode);
convert_move (op0, crc, 0);
}
diff --git a/gcc/expr.h b/gcc/expr.h
index 060151df010..40627c4f113 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -385,8 +385,7 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT,
unsigned short);
/* Generate table-based CRC. */
extern void generate_reflecting_code_standard (rtx *);
extern void expand_crc_table_based (rtx, rtx, rtx, rtx, machine_mode);
-extern void expand_reversed_crc_table_based (rtx, rtx, rtx, rtx, machine_mode,
- void (*) (rtx *));
+extern void expand_reversed_crc_table_based (rtx, rtx, rtx, rtx, machine_mode);
/* Cache of the "extended" flag in the target's _BitInt description
for use during expand. */
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 13fbd2ce788..acad06db8d7 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4087,8 +4087,7 @@ expand_crc_optab_fn (internal_fn fn, gcall *stmt,
convert_optab optab)
else
/* If it's IFN_CRC_REV generate bit-reversed CRC. */
expand_reversed_crc_table_based (dest, crc, data, polynomial,
- TYPE_MODE (data_type),
- generate_reflecting_code_standard);
+ TYPE_MODE (data_type));
/* Now get the return value where it needs to be, taking care to
ensure it's promoted appropriately if the ABI demands it.
--
2.34.1