On Thu, Jun 30, 2022 at 4:50 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > 1. Add a predicate for constant vectors which can be converted to integer > constants suitable for constant integer stores. For a 8-byte constant > vector, the converted 64-bit integer must be valid for store with 64-bit > immediate, which is a 64-bit integer sign-extended from a 32-bit integer. > 2. Add a new pattern to allow 2-byte, 4-byte and 8-byte constant vector > stores, like > > (set (mem:V2HI (reg:DI 84)) > (const_vector:V2HI [(const_int 0 [0]) (const_int 1 [0x1])])) > > 3. After reload, convert constant vector stores to constant integer > stores, like > > (set (mem:SI (reg:DI 5 di [84])) > (const_int 65536 [0x10000])) > > For > > void > foo (short * c) > { > c[0] = 0; > c[1] = 1; > } > > it generates > > movl $65536, (%rdi) > > instead of > > movl .LC0(%rip), %eax > movl %eax, (%rdi) > > gcc/ > > PR target/106022 > * config/i386/i386-protos.h (ix86_convert_const_vector_to_integer): > New. > * config/i386/i386.cc (ix86_convert_const_vector_to_integer): > New. > * config/i386/mmx.md (V_16_32_64): New. > (*mov<mode>_imm): New patterns for stores with 16-bit, 32-bit > and 64-bit constant vector. > * config/i386/predicates.md (x86_64_const_vector_operand): New. > > gcc/testsuite/ > > PR target/106022 > * gcc.target/i386/pr106022-1.c: New test. > * gcc.target/i386/pr106022-2.c: Likewise. > * gcc.target/i386/pr106022-3.c: Likewise. > * gcc.target/i386/pr106022-4.c: Likewise.
OK. Thanks, Uros. > --- > gcc/config/i386/i386-protos.h | 2 + > gcc/config/i386/i386.cc | 47 ++++++++++++++++++++++ > gcc/config/i386/mmx.md | 37 +++++++++++++++++ > gcc/config/i386/predicates.md | 11 +++++ > gcc/testsuite/gcc.target/i386/pr106022-1.c | 13 ++++++ > gcc/testsuite/gcc.target/i386/pr106022-2.c | 14 +++++++ > gcc/testsuite/gcc.target/i386/pr106022-3.c | 14 +++++++ > gcc/testsuite/gcc.target/i386/pr106022-4.c | 14 +++++++ > 8 files changed, 152 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-4.c > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index 3596ce81ecf..cf847751ac5 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -122,6 +122,8 @@ extern void ix86_expand_unary_operator (enum rtx_code, > machine_mode, > rtx[]); > extern rtx ix86_build_const_vector (machine_mode, bool, rtx); > extern rtx ix86_build_signbit_mask (machine_mode, bool, bool); > +extern HOST_WIDE_INT ix86_convert_const_vector_to_integer (rtx, > + machine_mode); > extern void ix86_split_convert_uns_si_sse (rtx[]); > extern void ix86_expand_convert_uns_didf_sse (rtx, rtx); > extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx); > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index b15b4893bb9..0cfe9962f75 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -15723,6 +15723,53 @@ ix86_build_signbit_mask (machine_mode mode, bool > vect, bool invert) > return force_reg (vec_mode, v); > } > > +/* Return HOST_WIDE_INT for const vector OP in MODE. */ > + > +HOST_WIDE_INT > +ix86_convert_const_vector_to_integer (rtx op, machine_mode mode) > +{ > + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) > + gcc_unreachable (); > + > + int nunits = GET_MODE_NUNITS (mode); > + wide_int val = wi::zero (GET_MODE_BITSIZE (mode)); > + machine_mode innermode = GET_MODE_INNER (mode); > + unsigned int innermode_bits = GET_MODE_BITSIZE (innermode); > + > + switch (mode) > + { > + case E_V2QImode: > + case E_V4QImode: > + case E_V2HImode: > + case E_V8QImode: > + case E_V4HImode: > + case E_V2SImode: > + for (int i = 0; i < nunits; ++i) > + { > + int v = INTVAL (XVECEXP (op, 0, i)); > + wide_int wv = wi::shwi (v, innermode_bits); > + val = wi::insert (val, wv, innermode_bits * i, innermode_bits); > + } > + break; > + case E_V2HFmode: > + case E_V4HFmode: > + case E_V2SFmode: > + for (int i = 0; i < nunits; ++i) > + { > + rtx x = XVECEXP (op, 0, i); > + int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), > + REAL_MODE_FORMAT (innermode)); > + wide_int wv = wi::shwi (v, innermode_bits); > + val = wi::insert (val, wv, innermode_bits * i, innermode_bits); > + } > + break; > + default: > + gcc_unreachable (); > + } > + > + return val.to_shwi (); > +} > + > /* Return TRUE or FALSE depending on whether the first SET in INSN > has source and destination with matching CC modes, and that the > CC mode is at least as constrained as REQ_MODE. */ > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index ba53007a35e..3294c1e6274 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -69,6 +69,12 @@ (define_mode_iterator VI_16_32 [V4QI V2QI V2HI]) > ;; 4-byte and 2-byte QImode vector modes > (define_mode_iterator VI1_16_32 [V4QI V2QI]) > > +;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element > +(define_mode_iterator V_16_32_64 > + [V2QI V4QI V2HI V2HF > + (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT") > + (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")]) > + > ;; V2S* modes > (define_mode_iterator V2FI [V2SF V2SI]) > > @@ -331,6 +337,37 @@ (define_insn "*mov<mode>_internal" > ] > (symbol_ref "true")))]) > > +;; 16-bit, 32-bit and 64-bit constant vector stores. After reload, > +;; convert them to immediate integer stores. > +(define_insn_and_split "*mov<mode>_imm" > + [(set (match_operand:V_16_32_64 0 "memory_operand" "=m") > + (match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))] > + "" > + "#" > + "&& reload_completed" > + [(set (match_dup 0) (match_dup 1))] > +{ > + HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1], > + <MODE>mode); > + operands[1] = GEN_INT (val); > + machine_mode mode; > + switch (GET_MODE_SIZE (<MODE>mode)) > + { > + case 2: > + mode = HImode; > + break; > + case 4: > + mode = SImode; > + break; > + case 8: > + mode = DImode; > + break; > + default: > + gcc_unreachable (); > + } > + operands[0] = lowpart_subreg (mode, operands[0], <MODE>mode); > +}) > + > ;; For TARGET_64BIT we always round up to 8 bytes. > (define_insn "*push<mode>2_rex64" > [(set (match_operand:V_32 0 "push_operand" "=X,X") > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > index 128144f1050..c71c453cceb 100644 > --- a/gcc/config/i386/predicates.md > +++ b/gcc/config/i386/predicates.md > @@ -1194,6 +1194,17 @@ (define_predicate "reg_or_const_vector_operand" > (ior (match_operand 0 "register_operand") > (match_code "const_vector"))) > > +;; Return true when OP is CONST_VECTOR which can be converted to a > +;; sign extended 32-bit integer. > +(define_predicate "x86_64_const_vector_operand" > + (match_code "const_vector") > +{ > + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) > + return false; > + HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op, mode); > + return trunc_int_for_mode (val, SImode) == val; > +}) > + > ;; Return true when OP is nonimmediate or standard SSE constant. > (define_predicate "nonimmediate_or_sse_const_operand" > (ior (match_operand 0 "nonimmediate_operand") > diff --git a/gcc/testsuite/gcc.target/i386/pr106022-1.c > b/gcc/testsuite/gcc.target/i386/pr106022-1.c > new file mode 100644 > index 00000000000..6643b4c30f1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr106022-1.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=x86-64" } */ > + > +void > +foo (char *c) > +{ > + c[0] = 0; > + c[1] = 1; > + c[2] = 2; > + c[3] = 3; > +} > + > +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$50462976," 1 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr106022-2.c > b/gcc/testsuite/gcc.target/i386/pr106022-2.c > new file mode 100644 > index 00000000000..0e79fb53297 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr106022-2.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=x86-64" } */ > + > +void > +foo (int *c) > +{ > + c = __builtin_assume_aligned (c, 16); > + c[0] = -1; > + c[1] = -1; > +} > + > +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 2 { target > { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1," 1 { target { ! > ia32 } } } } */ > +/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr106022-3.c > b/gcc/testsuite/gcc.target/i386/pr106022-3.c > new file mode 100644 > index 00000000000..8b0c2a8f6d8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr106022-3.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=x86-64" } */ > + > +void > +foo (int *c) > +{ > + c[0] = 0; > + c[1] = 1; > + c[2] = 2; > + c[3] = 3; > +} > + > +/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 1 } } */ > +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr106022-4.c > b/gcc/testsuite/gcc.target/i386/pr106022-4.c > new file mode 100644 > index 00000000000..8ecda170af3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr106022-4.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=x86-64" } */ > + > +void > +foo (float *c) > +{ > + c[0] = 2.3; > + c[1] = 0.0; > +} > + > +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x40133333" 1 { > target { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x00000000" 1 { > target { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$1075000115," 1 { > target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-not "xmm" } } */ > -- > 2.36.1 >