8 byte constant vector stores

Uros Bizjak via Gcc-patches Fri, 01 Jul 2022 08:32:03 -0700

On Thu, Jun 30, 2022 at 4:50 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> 1. Add a predicate for constant vectors which can be converted to integer
> constants suitable for constant integer stores.  For a 8-byte constant
> vector, the converted 64-bit integer must be valid for store with 64-bit
> immediate, which is a 64-bit integer sign-extended from a 32-bit integer.
> 2. Add a new pattern to allow 2-byte, 4-byte and 8-byte constant vector
> stores, like
>
> (set (mem:V2HI (reg:DI 84))
>      (const_vector:V2HI [(const_int 0 [0]) (const_int 1 [0x1])]))
>
> 3. After reload, convert constant vector stores to constant integer
> stores, like
>
> (set (mem:SI (reg:DI 5 di [84]))
>      (const_int 65536 [0x10000]))
>
> For
>
> void
> foo (short * c)
> {
>   c[0] = 0;
>   c[1] = 1;
> }
>
> it generates
>
>         movl    $65536, (%rdi)
>
> instead of
>
>         movl    .LC0(%rip), %eax
>         movl    %eax, (%rdi)
>
> gcc/
>
>         PR target/106022
>         * config/i386/i386-protos.h (ix86_convert_const_vector_to_integer):
>         New.
>         * config/i386/i386.cc (ix86_convert_const_vector_to_integer):
>         New.
>         * config/i386/mmx.md (V_16_32_64): New.
>         (*mov<mode>_imm): New patterns for stores with 16-bit, 32-bit
>         and 64-bit constant vector.
>         * config/i386/predicates.md (x86_64_const_vector_operand): New.
>
> gcc/testsuite/
>
>         PR target/106022
>         * gcc.target/i386/pr106022-1.c: New test.
>         * gcc.target/i386/pr106022-2.c: Likewise.
>         * gcc.target/i386/pr106022-3.c: Likewise.
>         * gcc.target/i386/pr106022-4.c: Likewise.


OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-protos.h              |  2 +
>  gcc/config/i386/i386.cc                    | 47 ++++++++++++++++++++++
>  gcc/config/i386/mmx.md                     | 37 +++++++++++++++++
>  gcc/config/i386/predicates.md              | 11 +++++
>  gcc/testsuite/gcc.target/i386/pr106022-1.c | 13 ++++++
>  gcc/testsuite/gcc.target/i386/pr106022-2.c | 14 +++++++
>  gcc/testsuite/gcc.target/i386/pr106022-3.c | 14 +++++++
>  gcc/testsuite/gcc.target/i386/pr106022-4.c | 14 +++++++
>  8 files changed, 152 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-4.c
>
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index 3596ce81ecf..cf847751ac5 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -122,6 +122,8 @@ extern void ix86_expand_unary_operator (enum rtx_code, 
> machine_mode,
>                                         rtx[]);
>  extern rtx ix86_build_const_vector (machine_mode, bool, rtx);
>  extern rtx ix86_build_signbit_mask (machine_mode, bool, bool);
> +extern HOST_WIDE_INT ix86_convert_const_vector_to_integer (rtx,
> +                                                          machine_mode);
>  extern void ix86_split_convert_uns_si_sse (rtx[]);
>  extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
>  extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index b15b4893bb9..0cfe9962f75 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -15723,6 +15723,53 @@ ix86_build_signbit_mask (machine_mode mode, bool 
> vect, bool invert)
>    return force_reg (vec_mode, v);
>  }
>
> +/* Return HOST_WIDE_INT for const vector OP in MODE.  */
> +
> +HOST_WIDE_INT
> +ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
> +{
> +  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
> +    gcc_unreachable ();
> +
> +  int nunits = GET_MODE_NUNITS (mode);
> +  wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
> +  machine_mode innermode = GET_MODE_INNER (mode);
> +  unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
> +
> +  switch (mode)
> +    {
> +    case E_V2QImode:
> +    case E_V4QImode:
> +    case E_V2HImode:
> +    case E_V8QImode:
> +    case E_V4HImode:
> +    case E_V2SImode:
> +      for (int i = 0; i < nunits; ++i)
> +       {
> +         int v = INTVAL (XVECEXP (op, 0, i));
> +         wide_int wv = wi::shwi (v, innermode_bits);
> +         val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
> +       }
> +      break;
> +    case E_V2HFmode:
> +    case E_V4HFmode:
> +    case E_V2SFmode:
> +      for (int i = 0; i < nunits; ++i)
> +       {
> +         rtx x = XVECEXP (op, 0, i);
> +         int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
> +                                 REAL_MODE_FORMAT (innermode));
> +         wide_int wv = wi::shwi (v, innermode_bits);
> +         val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
> +       }
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +
> +  return val.to_shwi ();
> +}
> +
>  /* Return TRUE or FALSE depending on whether the first SET in INSN
>     has source and destination with matching CC modes, and that the
>     CC mode is at least as constrained as REQ_MODE.  */
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index ba53007a35e..3294c1e6274 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -69,6 +69,12 @@ (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
>  ;; 4-byte and 2-byte QImode vector modes
>  (define_mode_iterator VI1_16_32 [V4QI V2QI])
>
> +;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
> +(define_mode_iterator V_16_32_64
> +   [V2QI V4QI V2HI V2HF
> +    (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
> +    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
> +
>  ;; V2S* modes
>  (define_mode_iterator V2FI [V2SF V2SI])
>
> @@ -331,6 +337,37 @@ (define_insn "*mov<mode>_internal"
>            ]
>            (symbol_ref "true")))])
>
> +;; 16-bit, 32-bit and 64-bit constant vector stores.  After reload,
> +;; convert them to immediate integer stores.
> +(define_insn_and_split "*mov<mode>_imm"
> +  [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
> +       (match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
> +  ""
> +  "#"
> +  "&& reload_completed"
> +  [(set (match_dup 0) (match_dup 1))]
> +{
> +  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
> +                                                           <MODE>mode);
> +  operands[1] = GEN_INT (val);
> +  machine_mode mode;
> +  switch (GET_MODE_SIZE (<MODE>mode))
> +    {
> +    case 2:
> +      mode = HImode;
> +      break;
> +    case 4:
> +      mode = SImode;
> +      break;
> +    case 8:
> +      mode = DImode;
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +  operands[0] = lowpart_subreg (mode, operands[0], <MODE>mode);
> +})
> +
>  ;; For TARGET_64BIT we always round up to 8 bytes.
>  (define_insn "*push<mode>2_rex64"
>    [(set (match_operand:V_32 0 "push_operand" "=X,X")
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 128144f1050..c71c453cceb 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -1194,6 +1194,17 @@ (define_predicate "reg_or_const_vector_operand"
>    (ior (match_operand 0 "register_operand")
>         (match_code "const_vector")))
>
> +;; Return true when OP is CONST_VECTOR which can be converted to a
> +;; sign extended 32-bit integer.
> +(define_predicate "x86_64_const_vector_operand"
> +  (match_code "const_vector")
> +{
> +  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
> +    return false;
> +  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op, mode);
> +  return trunc_int_for_mode (val, SImode) == val;
> +})
> +
>  ;; Return true when OP is nonimmediate or standard SSE constant.
>  (define_predicate "nonimmediate_or_sse_const_operand"
>    (ior (match_operand 0 "nonimmediate_operand")
> diff --git a/gcc/testsuite/gcc.target/i386/pr106022-1.c 
> b/gcc/testsuite/gcc.target/i386/pr106022-1.c
> new file mode 100644
> index 00000000000..6643b4c30f1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr106022-1.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64" } */
> +
> +void
> +foo (char *c)
> +{
> +  c[0] = 0;
> +  c[1] = 1;
> +  c[2] = 2;
> +  c[3] = 3;
> +}
> +
> +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$50462976," 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr106022-2.c 
> b/gcc/testsuite/gcc.target/i386/pr106022-2.c
> new file mode 100644
> index 00000000000..0e79fb53297
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr106022-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64" } */
> +
> +void
> +foo (int *c)
> +{
> +  c = __builtin_assume_aligned (c, 16);
> +  c[0] = -1;
> +  c[1] = -1;
> +}
> +
> +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 2 { target 
> { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1," 1 { target { ! 
> ia32 } } } } */
> +/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr106022-3.c 
> b/gcc/testsuite/gcc.target/i386/pr106022-3.c
> new file mode 100644
> index 00000000000..8b0c2a8f6d8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr106022-3.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64" } */
> +
> +void
> +foo (int *c)
> +{
> +  c[0] = 0;
> +  c[1] = 1;
> +  c[2] = 2;
> +  c[3] = 3;
> +}
> +
> +/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr106022-4.c 
> b/gcc/testsuite/gcc.target/i386/pr106022-4.c
> new file mode 100644
> index 00000000000..8ecda170af3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr106022-4.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64" } */
> +
> +void
> +foo (float *c)
> +{
> +  c[0] = 2.3;
> +  c[1] = 0.0;
> +}
> +
> +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x40133333" 1 { 
> target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x00000000" 1 { 
> target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$1075000115," 1 { 
> target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-not "xmm" } } */
> --
> 2.36.1
>

Re: [PATCH] x86: Support 2/4/8 byte constant vector stores

Reply via email to