Re: [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled

2019-02-16 Thread Uros Bizjak
On 2/16/19, H.J. Lu  wrote:
> With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
> when MMX is disabled.
>
>   PR target/89021
>   * config/i386/mmx.md (mmx_): Renamed to ...
>   (mmx__1): This.
>   (mmx_): New expander.
> ---
>  gcc/config/i386/mmx.md | 29 -
>  1 file changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 9cf0251293a..0f925c0b1ea 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1848,7 +1848,34 @@
>[(UNSPECV_EMMS "emms")
> (UNSPECV_FEMMS "femms")])
>
> -(define_insn "mmx_"
> +(define_expand "mmx_"
> +  [(unspec_volatile [(const_int 0)] EMMS)
> +   (clobber (reg:XF ST0_REG))
> +   (clobber (reg:XF ST1_REG))
> +   (clobber (reg:XF ST2_REG))
> +   (clobber (reg:XF ST3_REG))
> +   (clobber (reg:XF ST4_REG))
> +   (clobber (reg:XF ST5_REG))
> +   (clobber (reg:XF ST6_REG))
> +   (clobber (reg:XF ST7_REG))
> +   (clobber (reg:DI MM0_REG))
> +   (clobber (reg:DI MM1_REG))
> +   (clobber (reg:DI MM2_REG))
> +   (clobber (reg:DI MM3_REG))
> +   (clobber (reg:DI MM4_REG))
> +   (clobber (reg:DI MM5_REG))
> +   (clobber (reg:DI MM6_REG))
> +   (clobber (reg:DI MM7_REG))]
> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
> +{
> +   if (TARGET_MMX)
> + emit_insn (gen_mmx__1 ());
> +   else
> + emit_insn (gen_nop ());
> +   DONE;

The above should be written as:

if (!TARGET_MMX)
  {
emit_insn (gen_nop ()));
DONE;
  }

> +})
> +
> +(define_insn "mmx__1"

The old insn should be renamed to "*mmx_".

Uros.

>[(unspec_volatile [(const_int 0)] EMMS)
> (clobber (reg:XF ST0_REG))
> (clobber (reg:XF ST1_REG))
> --
> 2.20.1
>
>


Re: [PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE

2019-02-16 Thread Uros Bizjak
On 2/16/19, H.J. Lu  wrote:
> Emulate MMX mmx_pinsrw with SSE.  Only SSE register source operand is
> allowed.

Here we allow general register and memory operands in both cases,, so
the above sentence is misleading.

Uros.

>
>   PR target/89021
>   * config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and
>   TARGET_MMX_WITH_SSE.
>   (*mmx_pinsrw): Add SSE emulation.
> ---
>  gcc/config/i386/mmx.md | 33 +++--
>  1 file changed, 23 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 22547c7da6f..1e68d1bb338 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1282,32 +1282,45 @@
>  (match_operand:SI 2 "nonimmediate_operand"))
> (match_operand:V4HI 1 "register_operand")
>(match_operand:SI 3 "const_0_to_3_operand")))]
> -  "TARGET_SSE || TARGET_3DNOW_A"
> +  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> +   && (TARGET_SSE || TARGET_3DNOW_A)"
>  {
>operands[2] = gen_lowpart (HImode, operands[2]);
>operands[3] = GEN_INT (1 << INTVAL (operands[3]));
>  })
>
>  (define_insn "*mmx_pinsrw"
> -  [(set (match_operand:V4HI 0 "register_operand" "=y")
> +  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
>  (vec_merge:V4HI
>(vec_duplicate:V4HI
> -(match_operand:HI 2 "nonimmediate_operand" "rm"))
> -   (match_operand:V4HI 1 "register_operand" "0")
> +(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
> +   (match_operand:V4HI 1 "register_operand" "0,0,Yv")
>(match_operand:SI 3 "const_int_operand")))]
> -  "(TARGET_SSE || TARGET_3DNOW_A)
> +  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> +   && (TARGET_SSE || TARGET_3DNOW_A)
> && ((unsigned) exact_log2 (INTVAL (operands[3]))
> < GET_MODE_NUNITS (V4HImode))"
>  {
>operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
> -  if (MEM_P (operands[2]))
> -return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
> +  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
> +{
> +  if (MEM_P (operands[2]))
> + return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
> +  else
> + return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
> +}
>else
> -return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
> +{
> +  if (MEM_P (operands[2]))
> + return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
> +  else
> + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
> +}
>  }
> -  [(set_attr "type" "mmxcvt")
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxcvt,sselog,sselog")
> (set_attr "length_immediate" "1")
> -   (set_attr "mode" "DI")])
> +   (set_attr "mode" "DI,TI,TI")])
>
>  (define_insn "mmx_pextrw"
>[(set (match_operand:SI 0 "register_operand" "=r,r")
> --
> 2.20.1
>
>


Re: [PATCH 36/42] i386: Correct _pmulhrsw3[_mask]

2019-02-16 Thread Uros Bizjak
On 2/16/19, H.J. Lu  wrote:
> There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require
> AVX2.  To support TARGET_MMX_WITH_SSE, replace nonimmediate_operand with
> register_pmulhrswmem_operand in _pmulhrsw3.
>
>   PR target/89372
>   * config/i386/predicates.md (register_pmulhrswmem_operand): New.
>   * config/i386/sse.md (PMULHRSW): Remove V4HI.
>   (PMULHRSW_MMX): New.
>   (_pmulhrsw3): Replace PMULHRSW with
>   PMULHRSW_MMX.  Require TARGET_SSSE3, not TARGET_AVX2.  Replace
>   nonimmediate_operand with register_pmulhrswmem_operand.

Complications like the above usually points to wrong macroization
choice. Please try to split out V4HImode pattern.

Also, please separate the fix to a separate patch, which should be
committed independently as a fix before gcc-9 is released.

Uros.

> ---
>  gcc/config/i386/predicates.md |  7 +++
>  gcc/config/i386/sse.md| 15 +--
>  2 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index f3c2f72de54..b7cb26a81fe 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -56,6 +56,13 @@
> (and (not (match_test "TARGET_MMX_WITH_SSE"))
>   (match_operand 0 "memory_operand"
>
> +;; Match register operands, but include memory operands for
> +;; !(TARGET_MMX_WITH_SSE && mode == V4HImode).
> +(define_predicate "register_pmulhrswmem_operand"
> +  (ior (match_operand 0 "register_operand")
> +   (and (not (match_test "TARGET_MMX_WITH_SSE && mode == V4HImode"))
> + (match_operand 0 "memory_operand"
> +
>  ;; True if the operand is an SSE register.
>  (define_predicate "sse_reg_operand"
>(and (match_code "reg")
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 92f5ad17156..379da16615d 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -15579,7 +15579,7 @@
> (set_attr "mode" "DI,TI,TI")])
>
>  (define_mode_iterator PMULHRSW
> -  [V4HI V8HI (V16HI "TARGET_AVX2")])
> +  [V8HI (V16HI "TARGET_AVX2")])
>
>  (define_expand "_pmulhrsw3_mask"
>[(set (match_operand:PMULHRSW 0 "register_operand")
> @@ -15604,21 +15604,24 @@
>ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
>  })
>
> +(define_mode_iterator PMULHRSW_MMX
> +  [V4HI V8HI (V16HI "TARGET_AVX2")])
> +
>  (define_expand "_pmulhrsw3"
> -  [(set (match_operand:PMULHRSW 0 "register_operand")
> - (truncate:PMULHRSW
> +  [(set (match_operand:PMULHRSW_MMX 0 "register_operand")
> + (truncate:PMULHRSW_MMX
> (lshiftrt:
>   (plus:
> (lshiftrt:
>   (mult:
> (sign_extend:
> - (match_operand:PMULHRSW 1 "nonimmediate_operand"))
> + (match_operand:PMULHRSW_MMX 1 
> "register_pmulhrswmem_operand"))
> (sign_extend:
> - (match_operand:PMULHRSW 2 "nonimmediate_operand")))
> + (match_operand:PMULHRSW_MMX 2 
> "register_pmulhrswmem_operand")))
>   (const_int 14))
> (match_dup 3))
>   (const_int 1]
> -  "TARGET_AVX2"
> +  "TARGET_SSSE3"
>  {
>operands[3] = CONST1_RTX(mode);
>ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
> --
> 2.20.1
>
>


Re: [PATCH 39/42] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE

2019-02-16 Thread Uros Bizjak
On 2/16/19, H.J. Lu  wrote:
>   PR target/89021
>   * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set
>   mmx_ok to true if TARGET_MMX_WITH_SSE is true.
>   (ix86_expand_vector_init_one_nonzero): Likewise.
>   (ix86_expand_vector_init_one_var): Likewise.
>   (ix86_expand_vector_init_general): Likewise.
>   (ix86_expand_vector_init): Likewise.
>   (ix86_expand_vector_set): Likewise.
>   (ix86_expand_vector_extract): Likewise.
>   * config/i386/mmx.md (*vec_dupv2sf): Changed to
>   define_insn_and_split to support SSE emulation.
>   (*vec_extractv2sf_0): Likewise.
>   (*vec_extractv2sf_1): Likewise.
>   (*vec_extractv2si_0): Likewise.
>   (*vec_extractv2si_1): Likewise.
>   (*vec_extractv2si_zext_mem): Likewise.
>   (vec_setv2sf): Also allow TARGET_MMX_WITH_SSE.
>   (vec_extractv2sf_1 splitter): Likewise.
>   (vec_extractv2sfsf): Likewise.
>   (vec_setv2si): Likewise.
>   (vec_extractv2si_1 splitter): Likewise.
>   (vec_extractv2sisi): Likewise.
>   (vec_setv4hi): Likewise.
>   (vec_extractv4hihi): Likewise.
>   (vec_setv8qi): Likewise.
>   (vec_extractv8qiqi): Likewise.
> ---
>  gcc/config/i386/i386.c |  8 +
>  gcc/config/i386/mmx.md | 69 +++---
>  2 files changed, 52 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index a76c17beece..25e0dc43a9e 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -42620,6 +42620,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok,
> machine_mode mode,
>  {
>bool ok;
>
> +  mmx_ok |= TARGET_MMX_WITH_SSE;
>switch (mode)
>  {
>  case E_V2SImode:
> @@ -42779,6 +42780,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok,
> machine_mode mode,
>bool use_vector_set = false;
>rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
>
> +  mmx_ok |= TARGET_MMX_WITH_SSE;
>switch (mode)
>  {
>  case E_V2DImode:
> @@ -42972,6 +42974,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok,
> machine_mode mode,
>XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
>const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
>
> +  mmx_ok |= TARGET_MMX_WITH_SSE;
>switch (mode)
>  {
>  case E_V2DFmode:
> @@ -43357,6 +43360,7 @@ ix86_expand_vector_init_general (bool mmx_ok,
> machine_mode mode,
>machine_mode quarter_mode = VOIDmode;
>int n, i;
>
> +  mmx_ok |= TARGET_MMX_WITH_SSE;
>switch (mode)
>  {
>  case E_V2SFmode:
> @@ -43556,6 +43560,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target,
> rtx vals)
>int i;
>rtx x;
>
> +  mmx_ok |= TARGET_MMX_WITH_SSE;
> +
>/* Handle first initialization from vector elts.  */
>if (n_elts != XVECLEN (vals, 0))
>  {
> @@ -43655,6 +43661,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx
> val, int elt)
>machine_mode mmode = VOIDmode;
>rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
>
> +  mmx_ok |= TARGET_MMX_WITH_SSE;
>switch (mode)
>  {
>  case E_V2SFmode:
> @@ -44010,6 +44017,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target,
> rtx vec, int elt)
>bool use_vec_extr = false;
>rtx tmp;
>
> +  mmx_ok |= TARGET_MMX_WITH_SSE;
>switch (mode)
>  {
>  case E_V2SImode:
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index a21e11c8dfb..fa0b0126e91 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -555,14 +555,23 @@
> (set_attr "prefix_extra" "1")
> (set_attr "mode" "V2SF")])
>
> -(define_insn "*vec_dupv2sf"
> -  [(set (match_operand:V2SF 0 "register_operand" "=y")
> +(define_insn_and_split "*vec_dupv2sf"
> +  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
>   (vec_duplicate:V2SF
> -   (match_operand:SF 1 "register_operand" "0")))]
> -  "TARGET_MMX"
> -  "punpckldq\t%0, %0"
> -  [(set_attr "type" "mmxcvt")
> -   (set_attr "mode" "DI")])
> +   (match_operand:SF 1 "register_operand" "0,0,Yv")))]
> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
> +  "@
> +   punpckldq\t%0, %0
> +   #
> +   #"
> +  "TARGET_MMX_WITH_SSE && reload_completed"
> +  [(set (match_dup 0)
> + (vec_duplicate:V4SF (match_dup 1)))]
> +  "operands[0] = lowpart_subreg (V4SFmode, operands[0],
> +  GET_MODE (operands[0]));"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxcvt,ssemov,ssemov")
> +   (set_attr "mode" "DI,TI,TI")])
>
>  (define_insn "*mmx_concatv2sf"
>[(set (match_operand:V2SF 0 "register_operand" "=y,y")
> @@ -580,7 +589,7 @@
>[(match_operand:V2SF 0 "register_operand")
> (match_operand:SF 1 "register_operand")
> (match_operand 2 "const_int_operand")]
> -  "TARGET_MMX"
> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
>  {
>ix86_expand_vector_set (false, operands[0], operands[1],
> INTVAL (operands[2]));
> @@ -594,11 +603,13 @@
>   (vec_select:SF
>

Re: [PATCH] Decrease {i386,sse}.md global state by 12KB

2019-02-16 Thread Uros Bizjak
On 2/16/19, Jakub Jelinek  wrote:
> Hi!
>
> This is something I've noticed in a s390 change I'll post soon (where it
> was
> even completely unnecessary), but it applies to i386 backend too.
> Seems we have lots of .bss global state, 66x 64-byte and 61x 128-byte long
> static buffers.  Instead of doing
>   static char buf[128];
>   ...
>   s{,n}printf (buf, ...);
>   ...
>   return buf;
> in the insn templates we can do:
>   char buf[128];
>   ...
>   s{,n}printf (buf, ...);
>   ...
>   output_asm_insn (buf, operands);
>   return "";
> and avoid that way the global state.  The only problem with that is
> that final.c does something in between:
> 1) if return from the template is NULL, not this case
> 2) if return from the template is "#", not this case
> 3) if (targetm.asm_out.unwind_emit_before_insn
> && targetm.asm_out.unwind_emit)
>   targetm.asm_out.unwind_emit (asm_out_file, insn);
>while cygming.h has
> #define TARGET_ASM_UNWIND_EMIT  i386_pe_seh_unwind_emit
> #define TARGET_ASM_UNWIND_EMIT_BEFORE_INSN  false
>it is ok too (and other i386 subtargets don't do either,
>so unwind_emit_before_insn is true (the default) and unwind_emit
>NULL
> 4) rtx_call_insn *call_insn = dyn_cast  (insn);
> if (call_insn != NULL)
>that is for calls only, the patch doesn't change any calls
> Those 4 spots are in between get_insn_template and
> output_asm_insn (templ, recog_data.operand);
> which starts with:
>   /* An insn may return a null string template
>  in a case where no assembler code is needed.  */
>   if (*templ == 0)
> return;
> so I think the patch doesn't make it more costly, there is just
> one output_asm_insn extra call and the old one will return immediately.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2019-02-16  Jakub Jelinek  
>
>   * config/i386/i386.md (*movqi_internal): Remove static from
>   buf variable.  Use output_asm_insn (buf, operands); return "";
>   instead of return buf;.
>   * config/i386/sse.md (_andnot3,
>   *3, *andnot3, *andnottf3, *3,
>   *tf3, 3): Likewise.

Looks like cargo cult programming to me (some of the copies are even mine ;) .

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/i386.md.jj2019-02-12 21:48:53.183072497 +0100
> +++ gcc/config/i386/i386.md   2019-02-15 23:25:36.198589133 +0100
> @@ -2531,7 +2531,7 @@ (define_insn "*movqi_internal"
>   "Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m,C,BC"))]
>"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
>  {
> -  static char buf[128];
> +  char buf[128];
>const char *ops;
>const char *suffix;
>
> @@ -2564,7 +2564,8 @@ (define_insn "*movqi_internal"
>suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";
>
>snprintf (buf, sizeof (buf), ops, suffix);
> -  return buf;
> +  output_asm_insn (buf, operands);
> +  return "";
>
>  case TYPE_MSKLOG:
>if (operands[1] == const0_rtx)
> --- gcc/config/i386/sse.md.jj 2019-02-14 08:06:39.446519415 +0100
> +++ gcc/config/i386/sse.md2019-02-15 23:28:54.305366640 +0100
> @@ -3198,7 +3198,7 @@ (define_insn "_andnot3 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
>"TARGET_SSE && "
>  {
> -  static char buf[128];
> +  char buf[128];
>const char *ops;
>const char *suffix;
>
> @@ -3233,7 +3233,8 @@ (define_insn "_andnot3  }
>
>snprintf (buf, sizeof (buf), ops, suffix);
> -  return buf;
> +  output_asm_insn (buf, operands);
> +  return "";
>  }
>[(set_attr "isa" "noavx,avx,avx512dq,avx512f")
> (set_attr "type" "sselog")
> @@ -3264,7 +3265,7 @@ (define_insn "_andnot3 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
>"TARGET_AVX512F"
>  {
> -  static char buf[128];
> +  char buf[128];
>const char *ops;
>const char *suffix;
>
> @@ -3281,7 +3282,8 @@ (define_insn "_andnot3snprintf (buf, sizeof (buf),
>   "v%sandn%s\t{%%2, %%1, %%0|%%0, 
> %%1,
> %%2}",
>   ops, suffix);
> -  return buf;
> +  output_asm_insn (buf, operands);
> +  return "";
>  }
>[(set_attr "type" "sselog")
> (set_attr "prefix" "evex")
> @@ -3314,7 +3316,7 @@ (define_insn "*3"
>"TARGET_SSE && 
> && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>  {
> -  static char buf[128];
> +  char buf[128];
>const char *ops;
>const char *suffix;
>
> @@ -3349,7 +3351,8 @@ (define_insn "*3"
>  }
>
>snprintf (buf, sizeof (buf), ops, suffix);
> -  return buf;
> +  output_asm_insn (buf, operands);
> +  return "";
>  }
>[(set_attr "isa" "noavx,avx,avx512dq,avx512f")
> (set_attr "type" "sselog")
> @@ -3378,7 +3381,7 @@ (define_insn "*3"
> (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
>"TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>  {
> -  static char buf[128];
> +  char buf[128];
>const char *ops;
>const char *suffix;
>
> @@ -3395,7 +3398,8 @@ (define_insn "*3"
>snprin

[PR 89209] Avoid segfault in a peculiar corner case in SRA

2019-02-16 Thread Martin Jambor
Hi,

PR 89209 takes place because SRA on trunk encounters an assignment into
an SSA_NAME from a V_C_E of a structure load which however cannot
contain any useful data because (it is not addressable and) there is no
store to that portion of the aggregate in the entire function.  In such
circumstances, SRA conjures up a default-definition SSA name and
replaces the RHS of the load with it so that an uninitialized warning is
generated.  Unfortunately, the code digging through V_C_Es badly
interacts with this and what happens is that first we create an
aggregate type SSA name which the code avoiding creation of additional
V_C_Es then tries to store "into" the SSA name on the LHS, which of
course fails.  BTW, I was surprised no verifier caught the aggregate SSA
name if I just avoided the segfaulting path.

Fixed with the patch below which gives the code creating the
default-definition SSA_NAME an alternative type which is used if the
access type is not a gimple_register_typoe.  I have also added an
additional test that lacc is not NULL to sra_modify_assign because the
code path could trigger if the created default-def SSA_NAME happens to
be loaded as two different types.  However, I have not managed to
quickly create a testcase that would lead to it..

Bootstrapped and tested on x86_64-linux.  OK for trunk?

Thanks,

Martin


2019-02-15  Martin Jambor  

PR tree-optimization/89209
* tree-sra.c (create_access_replacement): New optional parameter
reg_tree.  Use it as a type if non-NULL and access type is not of
a register type.
(get_repl_default_def_ssa_name): New parameter REG_TYPE, pass it
to create_access_replacement.
(sra_modify_assign): Pass LHS type to get_repl_default_def_ssa_name.
Check lacc is non-NULL before attempting to re-create it on the RHS.

testsuite/
* gcc.dg/tree-ssa/pr89209.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr89209.c | 16 
 gcc/tree-sra.c  | 34 +++--
 2 files changed, 37 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89209.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89209.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr89209.c
new file mode 100644
index 000..f01bda9ae5c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89209.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct S {
+  short a, b;
+};
+struct T {
+  int c;
+  struct S s;
+};
+int f ()
+{
+  struct T t;
+  t.c = t.s.a || t.s.b;
+  return t.c;
+}
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index e4851daaa3f..eeef31ba496 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -2195,13 +2195,20 @@ sort_and_splice_var_accesses (tree var)
 
 /* Create a variable for the given ACCESS which determines the type, name and a
few other properties.  Return the variable declaration and store it also to
-   ACCESS->replacement.  */
+   ACCESS->replacement.  REG_TREE is used when creating a declaration to base a
+   default-definition SSA name on on in order to facilitate an uninitialized
+   warning.  It is used instead of the actual ACCESS type if that is not of a
+   gimple register type.  */
 
 static tree
-create_access_replacement (struct access *access)
+create_access_replacement (struct access *access, tree reg_type = NULL_TREE)
 {
   tree repl;
 
+  tree type = access->type;
+  if (reg_type && !is_gimple_reg_type (type))
+type = reg_type;
+
   if (access->grp_to_be_debug_replaced)
 {
   repl = create_tmp_var_raw (access->type);
@@ -2210,17 +2217,16 @@ create_access_replacement (struct access *access)
   else
 /* Drop any special alignment on the type if it's not on the main
variant.  This avoids issues with weirdo ABIs like AAPCS.  */
-repl = create_tmp_var (build_qualified_type
-(TYPE_MAIN_VARIANT (access->type),
- TYPE_QUALS (access->type)), "SR");
-  if (TREE_CODE (access->type) == COMPLEX_TYPE
-  || TREE_CODE (access->type) == VECTOR_TYPE)
+repl = create_tmp_var (build_qualified_type (TYPE_MAIN_VARIANT (type),
+TYPE_QUALS (type)), "SR");
+  if (TREE_CODE (type) == COMPLEX_TYPE
+  || TREE_CODE (type) == VECTOR_TYPE)
 {
   if (!access->grp_partial_lhs)
DECL_GIMPLE_REG_P (repl) = 1;
 }
   else if (access->grp_partial_lhs
-  && is_gimple_reg_type (access->type))
+  && is_gimple_reg_type (type))
 TREE_ADDRESSABLE (repl) = 1;
 
   DECL_SOURCE_LOCATION (repl) = DECL_SOURCE_LOCATION (access->base);
@@ -3450,15 +3456,16 @@ sra_modify_constructor_assign (gimple *stmt, 
gimple_stmt_iterator *gsi)
 
 /* Create and return a new suitable default definition SSA_NAME for RACC which
is an access describing an uninitialized part of an aggregate that is being
-   loaded.  */
+   loaded.  REG_TREE is used instead of the actual RACC type if that is not o

Re: [PATCH] Teach evrp that main's argc argument is always non-negative for C family (PR tree-optimization/89350)

2019-02-16 Thread Richard Biener
On February 16, 2019 8:12:34 AM GMT+01:00, Jakub Jelinek  
wrote:
>Hi!
>
>Both the C and C++ standard guarantee that the argc argument to main is
>non-negative, the following patch sets (or adjusts) the corresponding
>SSA_NAME_RANGE_INFO.  While main is just one, with IPA VRP it can also
>propagate etc.  I had to change one testcase because it started
>optimizing
>it better (the test has been folded away), so no sinking was done.

Can we handle this in _nonnegative_p? Also make it work independent of 
langhooks by looking up the translation unit decl from cfun via walking 
contexts? 

Is this a regression?

Richard. 

>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
>2019-02-16  Jakub Jelinek  
>
>   PR tree-optimization/89350
>   * gimple-ssa-evrp.c: Include tree-dfa.h and langhooks.h.
>   (maybe_set_main_argc_range): New function.
>   (execute_early_vrp): Call it.
>
>   * gcc.dg/tree-ssa/vrp122.c: New test.
>   * gcc.dg/tree-ssa/ssa-sink-3.c (main): Rename to ...
>   (bar): ... this.
>
>--- gcc/gimple-ssa-evrp.c.jj   2019-01-01 12:37:15.712998659 +0100
>+++ gcc/gimple-ssa-evrp.c  2019-02-15 09:49:56.768534668 +0100
>@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.
> #include "tree-cfgcleanup.h"
> #include "vr-values.h"
> #include "gimple-ssa-evrp-analyze.h"
>+#include "tree-dfa.h"
>+#include "langhooks.h"
> 
> class evrp_folder : public substitute_and_fold_engine
> {
>@@ -291,6 +293,39 @@ evrp_dom_walker::cleanup (void)
>   evrp_folder.vr_values->cleanup_edges_and_switches ();
> }
> 
>+/* argc in main in C/C++ is guaranteed to be non-negative.  Adjust the
>+   range info for it.  */
>+
>+static void
>+maybe_set_main_argc_range (void)
>+{
>+  if (!DECL_ARGUMENTS (current_function_decl)
>+  || !(lang_GNU_C () || lang_GNU_CXX () || lang_GNU_OBJC ()))
>+return;
>+
>+  tree argc = DECL_ARGUMENTS (current_function_decl);
>+  if (TYPE_MAIN_VARIANT (TREE_TYPE (argc)) != integer_type_node)
>+return;
>+
>+  argc = ssa_default_def (cfun, argc);
>+  if (argc == NULL_TREE)
>+return;
>+
>+  wide_int min, max;
>+  value_range_kind kind = get_range_info (argc, &min, &max);
>+  if (kind == VR_VARYING)
>+{
>+  min = wi::zero (TYPE_PRECISION (integer_type_node));
>+  max = wi::to_wide (TYPE_MAX_VALUE (integer_type_node));
>+}
>+  else if (kind == VR_RANGE && wi::neg_p (min) && !wi::neg_p (max))
>+min = wi::zero (TYPE_PRECISION (integer_type_node));
>+  else
>+return;
>+
>+  set_range_info (argc, VR_RANGE, min, max);
>+}
>+
>/* Main entry point for the early vrp pass which is a simplified
>non-iterative
>version of vrp where basic blocks are visited in dominance order. 
>Value
>ranges discovered in early vrp will also be used by ipa-vrp.  */
>@@ -307,6 +342,10 @@ execute_early_vrp ()
>   scev_initialize ();
>   calculate_dominance_info (CDI_DOMINATORS);
> 
>+  /* argc in main in C/C++ is guaranteed to be non-negative.  */
>+  if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
>+maybe_set_main_argc_range ();
>+
>   /* Walk stmts in dominance order and propagate VRP.  */
>   evrp_dom_walker walker;
>   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
>--- gcc/testsuite/gcc.dg/tree-ssa/vrp122.c.jj  2019-02-15
>09:54:07.016357759 +0100
>+++ gcc/testsuite/gcc.dg/tree-ssa/vrp122.c 2019-02-15
>09:53:59.299486561 +0100
>@@ -0,0 +1,14 @@
>+/* PR tree-optimization/89350 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+/* { dg-final { scan-tree-dump-not "link_error \\\(" "optimized" } }
>*/
>+
>+extern void link_error (void);
>+
>+int
>+main (int argc, const char *argv[])
>+{
>+  if (argc < 0)
>+link_error ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c.jj  2015-05-29
>15:03:44.947546711 +0200
>+++ gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c 2019-02-16
>08:04:29.951126611 +0100
>@@ -2,7 +2,7 @@
> /* { dg-options "-O2 -fdump-tree-sink-stats" } */
> extern void foo(int a);
> int
>-main (int argc)
>+bar (int argc)
> {
>   int a;
>   a = argc + 1;
>
>   Jakub



Re: [PATCH] Improve mem = STRING_CST expansion (PR rtl-optimization/66152)

2019-02-16 Thread Richard Biener
On February 16, 2019 8:19:06 AM GMT+01:00, Jakub Jelinek  
wrote:
>Hi!
>
>On the following testcase, we've regressed in bar since 8.x, in 8.x
>store merging came up with mem = 64-bit constant, but starting with the
>change to transform {0,1,2,3,4,5,6,7} char initializers into
>STRING_CSTs,
>we don't do that anymore.  The mem = STRING_CST expansion can do that,
>but only if there are no embedded zeros.  The following patch improves
>it even for embedded zeros, by using a new callback for the
>can_store_by_pieces/store_by_pieces calls which knows how to handle
>STRING_CST.  We don't need strlen in that case, can use TREE_STRING_CST
>instead.  Additionally, if the STRING_CST is slightly shorter than the
>destination region, it might generate better code by trying to
>store_by_pieces it all in one go (bytes from STRING_CST until the last
>one,
>followed by artificially added zeros) and only if that doesn't seem to
>be
>beneficial (e.g. very small STRING_CST followed by kilobytes of zeros)
>goes for the store_by_pieces of STRING_CST (rounded up to next
>STORE_MAX_PIECES) followed by a clear_storage.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK. 

Richard. 

>2019-02-16  Jakub Jelinek  
>
>   PR rtl-optimization/66152
>   * builtins.h (c_readstr): Declare.
>   * builtins.c (c_readstr): Remove forward declaration.  Add
>   null_terminated_p argument, if false, read all bytes from the
>   string instead of stopping after '\0'.
>   * expr.c (string_cst_read_str): New function.
>   (store_expr): Use string_cst_read_str instead of
>   builtin_strncpy_read_str.  Try to store by pieces the whole
>   exp_len first, and only if that fails, split it up into
>   store by pieces followed by clear_storage.  Formatting fix.
>
>   * gcc.target/i386/pr66152.c: New test.
>
>--- gcc/builtins.h.jj  2019-02-14 08:06:37.878546571 +0100
>+++ gcc/builtins.h 2019-02-15 11:33:50.208180171 +0100
>@@ -103,6 +103,7 @@ struct c_strlen_data
> };
> 
>extern tree c_strlen (tree, int, c_strlen_data * = NULL, unsigned = 1);
>+extern rtx c_readstr (const char *, scalar_int_mode, bool = true);
> extern void expand_builtin_setjmp_setup (rtx, rtx);
> extern void expand_builtin_setjmp_receiver (rtx);
> extern void expand_builtin_update_setjmp_buf (rtx);
>--- gcc/builtins.c.jj  2019-02-11 20:58:48.509965578 +0100
>+++ gcc/builtins.c 2019-02-15 11:37:00.046029652 +0100
>@@ -95,7 +95,6 @@ builtin_info_type builtin_info[(int)END_
> /* Non-zero if __builtin_constant_p should be folded right away.  */
> bool force_folding_builtin_constant_p;
> 
>-static rtx c_readstr (const char *, scalar_int_mode);
> static int target_char_cast (tree, char *);
> static rtx get_memory_rtx (tree, tree);
> static int apply_args_size (void);
>@@ -802,10 +801,14 @@ c_strlen (tree src, int only_value, c_st
> }
> 
> /* Return a constant integer corresponding to target reading
>-   GET_MODE_BITSIZE (MODE) bits from string constant STR.  */
>-
>-static rtx
>-c_readstr (const char *str, scalar_int_mode mode)
>+   GET_MODE_BITSIZE (MODE) bits from string constant STR.  If
>+   NULL_TERMINATED_P, reading stops after '\0' character, all further
>ones
>+   are assumed to be zero, otherwise it reads as many characters
>+   as needed.  */
>+
>+rtx
>+c_readstr (const char *str, scalar_int_mode mode,
>+ bool null_terminated_p/*=true*/)
> {
>   HOST_WIDE_INT ch;
>   unsigned int i, j;
>@@ -830,7 +833,7 @@ c_readstr (const char *str, scalar_int_m
>   j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1;
>   j *= BITS_PER_UNIT;
> 
>-  if (ch)
>+  if (ch || !null_terminated_p)
>   ch = (unsigned char) str[i];
> tmp[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT);
> }
>--- gcc/expr.c.jj  2019-02-08 20:00:40.309835608 +0100
>+++ gcc/expr.c 2019-02-15 11:37:18.715719809 +0100
>@@ -5453,6 +5453,30 @@ emit_storent_insn (rtx to, rtx from)
>   return maybe_expand_insn (code, 2, ops);
> }
> 
>+/* Helper function for store_expr storing of STRING_CST.  */
>+
>+static rtx
>+string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode
>mode)
>+{
>+  tree str = (tree) data;
>+
>+  gcc_assert (offset >= 0);
>+  if (offset >= TREE_STRING_LENGTH (str))
>+return const0_rtx;
>+
>+  if ((unsigned HOST_WIDE_INT) offset + GET_MODE_SIZE (mode)
>+  > (unsigned HOST_WIDE_INT) TREE_STRING_LENGTH (str))
>+{
>+  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
>+  size_t l = TREE_STRING_LENGTH (str) - offset;
>+  memcpy (p, TREE_STRING_POINTER (str) + offset, l);
>+  memset (p + l, '\0', GET_MODE_SIZE (mode) - l);
>+  return c_readstr (p, mode, false);
>+}
>+
>+  return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false);
>+}
>+
> /* Generate code for computing expression EXP,
>and storing the value into TARGET.
> 
>@@ -5472,7 +5496,7 @@ emit_storent_insn (rtx to, rtx from)
> 
> rtx
> store_expr (tree exp, rtx target, int cal

Re: [PR 89209] Avoid segfault in a peculiar corner case in SRA

2019-02-16 Thread Richard Biener
On February 16, 2019 11:56:13 AM GMT+01:00, Martin Jambor  
wrote:
>Hi,
>
>PR 89209 takes place because SRA on trunk encounters an assignment into
>an SSA_NAME from a V_C_E of a structure load which however cannot
>contain any useful data because (it is not addressable and) there is no
>store to that portion of the aggregate in the entire function.  In such
>circumstances, SRA conjures up a default-definition SSA name and
>replaces the RHS of the load with it so that an uninitialized warning
>is
>generated.  Unfortunately, the code digging through V_C_Es badly
>interacts with this and what happens is that first we create an
>aggregate type SSA name which the code avoiding creation of additional
>V_C_Es then tries to store "into" the SSA name on the LHS, which of
>course fails.  BTW, I was surprised no verifier caught the aggregate
>SSA
>name if I just avoided the segfaulting path.
>
>Fixed with the patch below which gives the code creating the
>default-definition SSA_NAME an alternative type which is used if the
>access type is not a gimple_register_typoe.  I have also added an
>additional test that lacc is not NULL to sra_modify_assign because the
>code path could trigger if the created default-def SSA_NAME happens to
>be loaded as two different types.  However, I have not managed to
>quickly create a testcase that would lead to it..
>
>Bootstrapped and tested on x86_64-linux.  OK for trunk?

OK. 

Richard. 

>Thanks,
>
>Martin
>
>
>2019-02-15  Martin Jambor  
>
>   PR tree-optimization/89209
>   * tree-sra.c (create_access_replacement): New optional parameter
>   reg_tree.  Use it as a type if non-NULL and access type is not of
>   a register type.
>   (get_repl_default_def_ssa_name): New parameter REG_TYPE, pass it
>   to create_access_replacement.
>   (sra_modify_assign): Pass LHS type to get_repl_default_def_ssa_name.
>   Check lacc is non-NULL before attempting to re-create it on the RHS.
>
>   testsuite/
>   * gcc.dg/tree-ssa/pr89209.c: New test.
>---
> gcc/testsuite/gcc.dg/tree-ssa/pr89209.c | 16 
> gcc/tree-sra.c  | 34 +++--
> 2 files changed, 37 insertions(+), 13 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89209.c
>
>diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89209.c
>b/gcc/testsuite/gcc.dg/tree-ssa/pr89209.c
>new file mode 100644
>index 000..f01bda9ae5c
>--- /dev/null
>+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89209.c
>@@ -0,0 +1,16 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2" } */
>+
>+struct S {
>+  short a, b;
>+};
>+struct T {
>+  int c;
>+  struct S s;
>+};
>+int f ()
>+{
>+  struct T t;
>+  t.c = t.s.a || t.s.b;
>+  return t.c;
>+}
>diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
>index e4851daaa3f..eeef31ba496 100644
>--- a/gcc/tree-sra.c
>+++ b/gcc/tree-sra.c
>@@ -2195,13 +2195,20 @@ sort_and_splice_var_accesses (tree var)
> 
>/* Create a variable for the given ACCESS which determines the type,
>name and a
>few other properties.  Return the variable declaration and store it
>also to
>-   ACCESS->replacement.  */
>+   ACCESS->replacement.  REG_TREE is used when creating a declaration
>to base a
>+   default-definition SSA name on on in order to facilitate an
>uninitialized
>+   warning.  It is used instead of the actual ACCESS type if that is
>not of a
>+   gimple register type.  */
> 
> static tree
>-create_access_replacement (struct access *access)
>+create_access_replacement (struct access *access, tree reg_type =
>NULL_TREE)
> {
>   tree repl;
> 
>+  tree type = access->type;
>+  if (reg_type && !is_gimple_reg_type (type))
>+type = reg_type;
>+
>   if (access->grp_to_be_debug_replaced)
> {
>   repl = create_tmp_var_raw (access->type);
>@@ -2210,17 +2217,16 @@ create_access_replacement (struct access
>*access)
>   else
> /* Drop any special alignment on the type if it's not on the main
>variant.  This avoids issues with weirdo ABIs like AAPCS.  */
>-repl = create_tmp_var (build_qualified_type
>-   (TYPE_MAIN_VARIANT (access->type),
>-TYPE_QUALS (access->type)), "SR");
>-  if (TREE_CODE (access->type) == COMPLEX_TYPE
>-  || TREE_CODE (access->type) == VECTOR_TYPE)
>+repl = create_tmp_var (build_qualified_type (TYPE_MAIN_VARIANT
>(type),
>+   TYPE_QUALS (type)), "SR");
>+  if (TREE_CODE (type) == COMPLEX_TYPE
>+  || TREE_CODE (type) == VECTOR_TYPE)
> {
>   if (!access->grp_partial_lhs)
>   DECL_GIMPLE_REG_P (repl) = 1;
> }
>   else if (access->grp_partial_lhs
>- && is_gimple_reg_type (access->type))
>+ && is_gimple_reg_type (type))
> TREE_ADDRESSABLE (repl) = 1;
> 
>   DECL_SOURCE_LOCATION (repl) = DECL_SOURCE_LOCATION (access->base);
>@@ -3450,15 +3456,16 @@ sra_modify_constructor_assign (gimple *stmt,
>gimple_stmt_iterator *gsi)
> 
>/* Create and return a new suitable default definition SSA

Re: [PATCH] Teach evrp that main's argc argument is always non-negative for C family (PR tree-optimization/89350)

2019-02-16 Thread Jakub Jelinek
On Sat, Feb 16, 2019 at 12:10:22PM +0100, Richard Biener wrote:
> On February 16, 2019 8:12:34 AM GMT+01:00, Jakub Jelinek  
> wrote:
> >Both the C and C++ standard guarantee that the argc argument to main is
> >non-negative, the following patch sets (or adjusts) the corresponding
> >SSA_NAME_RANGE_INFO.  While main is just one, with IPA VRP it can also
> >propagate etc.  I had to change one testcase because it started
> >optimizing
> >it better (the test has been folded away), so no sinking was done.
> 
> Can we handle this in _nonnegative_p?  Also make it work independent of

We could.  But I was wondering if that wouldn't be too costly to check it
there for each SSA_NAME tested (at least something like is this a default
definition SSA_NAME of a PARM_DECL in MAIN_DECL_P (DECL_NAME
(current_function_decl))).  Furthermore, I think VRP doesn't use the
*_nonnegative_*p APIs except for gimple_stmt_nonnegative_warnv_p
and so it would need to be repeated in VRP anyway.  I vaguely remember we
didn't want to use value ranges in *nonnegative_*_p and *nonnegative_*_p
in VRP because that would cause weird behavior.

> langhooks by looking up the translation unit decl from cfun via walking
> contexts?

It is very well possible that MAIN_DECL_P is true only for actual main
that _start calls even for other languages, I know for sure that
it is the case of Fortran, just haven't analyzed what exactly is Ada doing
(it has some main_identifier_node code in there), or Go, D etc.

> Is this a regression?

The patch fixes the regression on #c0 in that PR, though admittedly only
if it is in main and not in some other random function with random int
argument.  So, if you want to defer this for GCC 10, I can surely wait with
that.

Jakub


[RS6000] Fix _ and tf_ splitters

2019-02-16 Thread Alan Modra
This patch fixes a bug that can result in "insn does not satisfy its
constraints" if these splitters fire due to not getting ctr for the
jump insn.  Since the jump insn can have any of r,m,d,wi,c,l as the
decremented count output, it's not sufficient to check for
gpc_reg_operand (which matches VSX regs for example).  Seen after
correcting register_move_cost when the cost of gpr <-> vsx is much
lower.  Since this is a prerequisite to fixing PR89271, I'm mentioning
that PR in the ChangeLog.

The tf_ split had a further bug in that it wouldn't match
if the count output was m,d,wi, or l.

Bootstrapped etc. powerpc64le-linux.  OK?

PR target/89271
* config/rs6000/rs6000.md (_ split): Check for an int
output reg on add insn.
(tf_ split): Likewise.  Match predicates with insn.

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9dd222dc5d7..51b6b7a9660 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -12525,7 +12525,7 @@ (define_split
const0_rtx);
   emit_insn (gen_rtx_SET (operands[3],
  gen_rtx_COMPARE (CCmode, operands[1], const1_rtx)));
-  if (gpc_reg_operand (operands[0], mode))
+  if (int_reg_operand (operands[0], mode))
 emit_insn (gen_add3 (operands[0], operands[1], constm1_rtx));
   else
 {
@@ -12603,7 +12603,7 @@ (define_split
   (const_int 0)]))
  (match_operand 4)
  (match_operand 5)))
-   (set (match_operand:P 6 "int_reg_operand")
+   (set (match_operand:P 6 "nonimmediate_operand")
(plus:P (match_dup 0)
(const_int -1)))
(clobber (match_scratch:P 7))
@@ -12636,7 +12636,7 @@ (define_split
   else
  emit_insn (gen_cceq_ior_compare_complement (operands[9], andexpr, 
ctrcmpcc,
 operands[8], cccmp, ccin));
-  if (gpc_reg_operand (operands[0], mode))
+  if (int_reg_operand (ctrout, mode))
  emit_insn (gen_add3 (ctrout, ctr, constm1_rtx));
   else
 {

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] Add target-zlib to top-level configure, use zlib from libphobos

2019-02-16 Thread Matthias Klose
On 12.02.19 21:54, Iain Buclaw wrote:
> On Tue, 12 Feb 2019 at 10:40, Richard Biener  
> wrote:
>>
>> On Sat, Feb 9, 2019 at 10:37 AM Iain Buclaw  wrote:
>>>
>>> On Mon, 28 Jan 2019 at 13:10, Richard Biener  
>>> wrote:

 On Mon, Jan 21, 2019 at 7:35 PM Iain Buclaw  wrote:
>
> Hi,
>
> Following on from the last, this adds target-zlib to target_libraries
> and updates libphobos build scripts to link to libz_convenience.a.
> The D front-end already has target-zlib in d/config-lang.in.
>
> Is the top-level part OK?  I considered disabling target-zlib if
> libphobos is not being built, but decided against unless it's
> requested.

 Hmm, you overload --with-system-zlib to apply to both host and target
 (I guess it already applied to build), not sure if that's really desired?
 I suppose libphobos is the first target library linking against zlib?

>>>
>>> Originally, libgcj linked to zlib.
>>>
 You are also falling back to in-tree zlib if --with-system-zlib was
 specified but no zlib was found - I guess for cross builds that
 will easily get not noticed...  The toplevel --with-system-zlib makes
 it much harder and simply fails.

>>>
>>> OK, so keep --with-target-system-zlib to distinguish between the two?
>>
>> Yes, and fail if specificed but not found.
>>
> 
> Updated patch.  Checked that it correctly fails when
> --with-target-system-zlib and zlib missing.

For the GC enabled libobjc I added an --enable-objc-gc=auto to fall-back to the
in-tree library when no system library is found. Could you do the same for zlib?
There might be some multilib variants missing in distros.

Matthias


Re: [PATCH, RFC] Avoid the -D option which is not available install-sh

2019-02-16 Thread Bernd Edlinger
On 2/9/19 7:21 PM, Bernd Edlinger wrote:
> On 2/9/19 7:18 PM, Jakub Jelinek wrote:
>> On Sat, Feb 09, 2019 at 06:11:00PM +, Bernd Edlinger wrote:
>>> --- libphobos/libdruntime/Makefile.am   (revision 268614)
>>> +++ libphobos/libdruntime/Makefile.am   (working copy)
>>> @@ -140,10 +140,12 @@ clean-local:
>>>  # Handles generated files as well
>>>  install-data-local:
>>> for file in $(ALL_DRUNTIME_INSTALL_DSOURCES); do \
>>> + $(MKDIR_P) `echo $(DESTDIR)$(gdc_include_dir)/$$file \
>>> + | sed -e 's:/[^/]*$$::'` ; \
>>
>> Perhaps better `dirname $(DESTDIR)$(gdc_include_dir)/$$file` ?
>>
> 
> Ah, yes, good point.
> 
> Consider it changed.
> 
> 

So here is the latest version with the requested change.

How is the procedure with libpobos patches?
Can we check them into the gcc svn, or will Ian have to
push them first into the upstream?


Thanks
Bernd.
2019-01-31  Bernd Edlinger  

	* src/Makefile.am: Avoid the -D option which is not available
	with the install-sh fallback.  Use $(MKDIR_P) instead.
	* libdruntime/Makefile.am: Likewise.
	* configure: Regenerated.
	* Makefile.in: Regenerated.
	* src/Makefile.in: Regenerated.
	* libdruntime/Makefile.in: Regenerated.
	* testsuite/Makefile.in: Regenerated.

Index: libphobos/Makefile.in
===
--- libphobos/Makefile.in	(revision 268718)
+++ libphobos/Makefile.in	(working copy)
@@ -15,7 +15,7 @@
 @SET_MAKE@
 
 # Makefile for the toplevel directory of the D Standard library.
-# Copyright (C) 2006-2018 Free Software Foundation, Inc.
+# Copyright (C) 2006-2019 Free Software Foundation, Inc.
 #
 # GCC is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -319,7 +319,6 @@ phobos_compiler_shared_flag = @phobos_compiler_sha
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
-runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
Index: libphobos/configure
===
--- libphobos/configure	(revision 268718)
+++ libphobos/configure	(working copy)
@@ -782,7 +782,6 @@ infodir
 docdir
 oldincludedir
 includedir
-runstatedir
 localstatedir
 sharedstatedir
 sysconfdir
@@ -868,7 +867,6 @@ datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
-runstatedir='${localstatedir}/run'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
 docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@@ -1121,15 +1119,6 @@ do
   | -silent | --silent | --silen | --sile | --sil)
 silent=yes ;;
 
-  -runstatedir | --runstatedir | --runstatedi | --runstated \
-  | --runstate | --runstat | --runsta | --runst | --runs \
-  | --run | --ru | --r)
-ac_prev=runstatedir ;;
-  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
-  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
-  | --run=* | --ru=* | --r=*)
-runstatedir=$ac_optarg ;;
-
   -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
 ac_prev=sbindir ;;
   -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1267,7 +1256,7 @@ fi
 for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
 		datadir sysconfdir sharedstatedir localstatedir includedir \
 		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
-		libdir localedir mandir runstatedir
+		libdir localedir mandir
 do
   eval ac_val=\$$ac_var
   # Remove trailing slashes.
@@ -1420,7 +1409,6 @@ Fine tuning of the installation directories:
   --sysconfdir=DIRread-only single-machine data [PREFIX/etc]
   --sharedstatedir=DIRmodifiable architecture-independent data [PREFIX/com]
   --localstatedir=DIR modifiable single-machine data [PREFIX/var]
-  --runstatedir=DIR   modifiable per-process data [LOCALSTATEDIR/run]
   --libdir=DIRobject code libraries [EPREFIX/lib]
   --includedir=DIRC header files [PREFIX/include]
   --oldincludedir=DIR C header files for non-gcc [/usr/include]
@@ -11508,7 +11496,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11511 "configure"
+#line 11499 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11614,7 +11602,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11617 "configure"
+#line 11605 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
Index: libphobos/libdruntime/Makefile.am
===
--- libphobos/libdruntime/Makefile.am	(revision 268718)
+++ libphobos/libdruntime/Makefile.am	(working copy)
@@ -140,10 +140,11 @@ clean-local:
 # Handles generated files as well
 install-data-local:
 	for file in $(ALL_DRUNTIME_INSTALL

*ping* [patch, fortran] Fix part of PR 71066

2019-02-16 Thread Thomas Koenig

Am 10.02.19 um 12:13 schrieb Thomas Koenig:

Hello world,

this patch fixes the coarray part of PR 71066 - handling of data
statements for coarrays.  The PR itself is marked as a 7/8/9
regression.

Regression-tested.  OK for trunk and for backporting?


Ping?

Regards

Thomas


Re: [testsuite] Tweak gcc.target/sparc/struct-ret-check-1.c

2019-02-16 Thread Eric Botcazou
> It cannot pass in PIE mode.

Likewise for the 3 c-c++-common/patchable_function_entry-*.c on SPARC.

Tested on x86-64/Linux and SPARC64/Linux, applied on mainline and 8 branch.


2019-02-16  Eric Botcazou  

* c-c++-common/patchable_function_entry-decl.c: Add -fno-pie on SPARC.
* c-c++-common/patchable_function_entry-default.c: Likewise.
* c-c++-common/patchable_function_entry-definition.c: Likewise.

-- 
Eric BotcazouIndex: c-c++-common/patchable_function_entry-decl.c
===
--- c-c++-common/patchable_function_entry-decl.c	(revision 268932)
+++ c-c++-common/patchable_function_entry-decl.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do compile { target { ! { nvptx*-*-* visium-*-* } } } } */
 /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
+/* { dg-additional-options "-fno-pie" { target sparc*-*-* } } */
 /* { dg-final { scan-assembler-times "nop|NOP" 2 { target { ! { alpha*-*-* } } } } } */
 /* { dg-final { scan-assembler-times "bis" 2 { target alpha*-*-* } } } */
 
Index: c-c++-common/patchable_function_entry-default.c
===
--- c-c++-common/patchable_function_entry-default.c	(revision 268932)
+++ c-c++-common/patchable_function_entry-default.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do compile { target { ! { nvptx*-*-* visium-*-* } } } } */
 /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
+/* { dg-additional-options "-fno-pie" { target sparc*-*-* } } */
 /* { dg-final { scan-assembler-times "nop|NOP" 3 { target { ! { alpha*-*-* } } } } } */
 /* { dg-final { scan-assembler-times "bis" 3 { target alpha*-*-* } } } */
 
Index: c-c++-common/patchable_function_entry-definition.c
===
--- c-c++-common/patchable_function_entry-definition.c	(revision 268932)
+++ c-c++-common/patchable_function_entry-definition.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do compile { target { ! { nvptx*-*-* visium-*-* } } } } */
 /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
+/* { dg-additional-options "-fno-pie" { target sparc*-*-* } } */
 /* { dg-final { scan-assembler-times "nop|NOP" 1 { target { ! { alpha*-*-* } } } } } */
 /* { dg-final { scan-assembler-times "bis" 1 { target alpha*-*-* } } } */
 


Re: [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled

2019-02-16 Thread H.J. Lu
On Sat, Feb 16, 2019 at 12:58 AM Uros Bizjak  wrote:
>
> On 2/16/19, H.J. Lu  wrote:
> > With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
> > when MMX is disabled.
> >
> >   PR target/89021
> >   * config/i386/mmx.md (mmx_): Renamed to ...
> >   (mmx__1): This.
> >   (mmx_): New expander.
> > ---
> >  gcc/config/i386/mmx.md | 29 -
> >  1 file changed, 28 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > index 9cf0251293a..0f925c0b1ea 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -1848,7 +1848,34 @@
> >[(UNSPECV_EMMS "emms")
> > (UNSPECV_FEMMS "femms")])
> >
> > -(define_insn "mmx_"
> > +(define_expand "mmx_"
> > +  [(unspec_volatile [(const_int 0)] EMMS)
> > +   (clobber (reg:XF ST0_REG))
> > +   (clobber (reg:XF ST1_REG))
> > +   (clobber (reg:XF ST2_REG))
> > +   (clobber (reg:XF ST3_REG))
> > +   (clobber (reg:XF ST4_REG))
> > +   (clobber (reg:XF ST5_REG))
> > +   (clobber (reg:XF ST6_REG))
> > +   (clobber (reg:XF ST7_REG))
> > +   (clobber (reg:DI MM0_REG))
> > +   (clobber (reg:DI MM1_REG))
> > +   (clobber (reg:DI MM2_REG))
> > +   (clobber (reg:DI MM3_REG))
> > +   (clobber (reg:DI MM4_REG))
> > +   (clobber (reg:DI MM5_REG))
> > +   (clobber (reg:DI MM6_REG))
> > +   (clobber (reg:DI MM7_REG))]
> > +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
> > +{
> > +   if (TARGET_MMX)
> > + emit_insn (gen_mmx__1 ());
> > +   else
> > + emit_insn (gen_nop ());
> > +   DONE;
>
> The above should be written as:
>
> if (!TARGET_MMX)
>   {
> emit_insn (gen_nop ()));
> DONE;
>   }
>
> > +})
> > +
> > +(define_insn "mmx__1"
>
> The old insn should be renamed to "*mmx_".
>
> Uros.

Tried and got

[hjl@gnu-cfl-2 gcc]$ cat x.c
#include 

void
foo (void)
{
  _mm_empty ();
}
[hjl@gnu-cfl-2 gcc]$ ./xgcc -B./ -S x.c -da
x.c: In function ‘foo’:
x.c:7:1: error: unrecognizable insn:
7 | }
  | ^
(insn 5 2 6 2 (unspec_volatile [
(const_int 0 [0])
] UNSPECV_EMMS) "./include/mmintrin.h":60:3 -1
 (nil))
during RTL pass: vregs
dump file: x.c.234r.vregs
x.c:7:1: internal compiler error: in extract_insn, at recog.c:2310
0x10ad84d _fatal_insn(char const*, rtx_def const*, char const*, int,
char const*)
/export/gnu/import/git/gitlab/x86-gcc/gcc/rtl-error.c:108
0x10ad88e _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
/export/gnu/import/git/gitlab/x86-gcc/gcc/rtl-error.c:116
0x1042abb extract_insn(rtx_insn*)
/export/gnu/import/git/gitlab/x86-gcc/gcc/recog.c:2310
0xc95912 instantiate_virtual_regs_in_insn
/export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:1654
0xc96d44 instantiate_virtual_regs
/export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:1975
0xc96e0e execute
/export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:2024
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
[hjl@gnu-cfl-2 gcc]$

;;
;; Full RTL generated for this function:
;;
(note 1 0 3 NOTE_INSN_DELETED)
;; basic block 2, loop depth 0, maybe hot
;;  prev block 0, next block 1, flags: (NEW, REACHABLE, RTL)
;;  pred:   ENTRY (FALLTHRU)
(note 3 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
(insn 5 2 6 2 (unspec_volatile [
(const_int 0 [0])
] UNSPECV_EMMS) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 6 5 7 2 (clobber (reg:XF 8 st)) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 7 6 8 2 (clobber (reg:XF 9 st(1))) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 8 7 9 2 (clobber (reg:XF 10 st(2))) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 9 8 10 2 (clobber (reg:XF 11 st(3))) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 10 9 11 2 (clobber (reg:XF 12 st(4))) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 11 10 12 2 (clobber (reg:XF 13 st(5))) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 12 11 13 2 (clobber (reg:XF 14 st(6))) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 13 12 14 2 (clobber (reg:XF 15 st(7))) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 14 13 15 2 (clobber (reg:DI 28 mm0)) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 15 14 16 2 (clobber (reg:DI 29 mm1)) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 16 15 17 2 (clobber (reg:DI 30 mm2)) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 17 16 18 2 (clobber (reg:DI 31 mm3)) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 18 17 19 2 (clobber (reg:DI 32 mm4)) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 19 18 20 2 (clobber (reg:DI 33 mm5)) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 20 19 21 2 (clobber (reg:DI 34 mm6)) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 21 20 25 2 (clobber (reg:DI 35 mm7)) "./include/mmintrin.h":60:3 -1
 (nil))
(insn 25 21 0 2 (const_int 0 [0]) "./include/mmintrin.h":61:1 -1
 (nil))
;;  succ:   EXIT [always]  (FALLTHRU)



-- 
H

Re: [PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE

2019-02-16 Thread H.J. Lu
On Sat, Feb 16, 2019 at 1:08 AM Uros Bizjak  wrote:
>
> On 2/16/19, H.J. Lu  wrote:
> > Emulate MMX mmx_pinsrw with SSE.  Only SSE register source operand is
> > allowed.
>
> Here we allow general register and memory operands in both cases,, so
> the above sentence is misleading.

Changed to

Emulate MMX mmx_pinsrw with SSE.  Only SSE register destination operand
is allowed.

> Uros.
>
> >
> >   PR target/89021
> >   * config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and
> >   TARGET_MMX_WITH_SSE.
> >   (*mmx_pinsrw): Add SSE emulation.
> > ---
> >  gcc/config/i386/mmx.md | 33 +++--
> >  1 file changed, 23 insertions(+), 10 deletions(-)
> >
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > index 22547c7da6f..1e68d1bb338 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -1282,32 +1282,45 @@
> >  (match_operand:SI 2 "nonimmediate_operand"))
> > (match_operand:V4HI 1 "register_operand")
> >(match_operand:SI 3 "const_0_to_3_operand")))]
> > -  "TARGET_SSE || TARGET_3DNOW_A"
> > +  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> > +   && (TARGET_SSE || TARGET_3DNOW_A)"
> >  {
> >operands[2] = gen_lowpart (HImode, operands[2]);
> >operands[3] = GEN_INT (1 << INTVAL (operands[3]));
> >  })
> >
> >  (define_insn "*mmx_pinsrw"
> > -  [(set (match_operand:V4HI 0 "register_operand" "=y")
> > +  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
> >  (vec_merge:V4HI
> >(vec_duplicate:V4HI
> > -(match_operand:HI 2 "nonimmediate_operand" "rm"))
> > -   (match_operand:V4HI 1 "register_operand" "0")
> > +(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
> > +   (match_operand:V4HI 1 "register_operand" "0,0,Yv")
> >(match_operand:SI 3 "const_int_operand")))]
> > -  "(TARGET_SSE || TARGET_3DNOW_A)
> > +  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> > +   && (TARGET_SSE || TARGET_3DNOW_A)
> > && ((unsigned) exact_log2 (INTVAL (operands[3]))
> > < GET_MODE_NUNITS (V4HImode))"
> >  {
> >operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
> > -  if (MEM_P (operands[2]))
> > -return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
> > +  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
> > +{
> > +  if (MEM_P (operands[2]))
> > + return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
> > +  else
> > + return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
> > +}
> >else
> > -return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
> > +{
> > +  if (MEM_P (operands[2]))
> > + return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
> > +  else
> > + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
> > +}
> >  }
> > -  [(set_attr "type" "mmxcvt")
> > +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> > +   (set_attr "type" "mmxcvt,sselog,sselog")
> > (set_attr "length_immediate" "1")
> > -   (set_attr "mode" "DI")])
> > +   (set_attr "mode" "DI,TI,TI")])
> >
> >  (define_insn "mmx_pextrw"
> >[(set (match_operand:SI 0 "register_operand" "=r,r")
> > --
> > 2.20.1
> >
> >



-- 
H.J.


V2 [PATCH] i386: Insert ENDBR for NOTE_INSN_DELETED_LABEL only if needed

2019-02-16 Thread H.J. Lu
On Thu, Feb 14, 2019 at 08:13:32PM -0800, H.J. Lu wrote:
> NOTE_INSN_DELETED_LABEL is used to mark what used to be a 'code_label',
> but was not used for other purposes than taking its address and was
> transformed to mark that no code jumps to it.  NOTE_INSN_DELETED_LABEL
> is generated only in 3 places:
> 
> 1. When delete_insn sees an unused label which is an explicit label in
> the input source code or its address is taken, it turns the label into
> a NOTE_INSN_DELETED_LABEL note.
> 2. When rtl_tidy_fallthru_edge deletes a tablejump, it turns the
> tablejump into a NOTE_INSN_DELETED_LABEL note.
> 3. ix86_init_large_pic_reg creats a NOTE_INSN_DELETED_LABEL note, .L2,
> to initialize large model PIC register:
> 
> L2:
>   movabsq $_GLOBAL_OFFSET_TABLE_-.L2, %r11
>   leaq.L2(%rip), %rax
>   movabsq $val@GOT, %rdx
>   addq%r11, %rax
> 
> Among of them, ENDBR is needed only when the label address is taken.
> rest_of_insert_endbranch has
> 
>   if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
>   || (NOTE_P (insn)
>   && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
> /* TODO.  Check /s bit also.  */
> {
>   cet_eb = gen_nop_endbr ();
>   emit_insn_after (cet_eb, insn);
>   continue;
> }
> 
> For NOTE_INSN_DELETED_LABEL, we should check if forced_labels to see
> if its address is taken.  Also ix86_init_large_pic_reg shouldn't set
> LABEL_PRESERVE_P (in_struct) since NOTE_INSN_DELETED_LABEL is suffcient
> to keep the label.
> 
> gcc/
> 
>   PR target/89355
>   * config/i386/i386.c (rest_of_insert_endbranch): Check
>   forced_labels to see if the address of NOTE_INSN_DELETED_LABEL
>   is taken.
>   (ix86_init_large_pic_reg): Don't set LABEL_PRESERVE_P.
> 

Here is the updated patch.  We should check LABEL_PRESERVE_P on
NOTE_INSN_DELETED_LABEL to see if its address is taken.

OK for trunk?

Thanks.

H.J.
---
NOTE_INSN_DELETED_LABEL is used to mark what used to be a 'code_label',
but was not used for other purposes than taking its address and was
transformed to mark that no code jumps to it.  Since LABEL_PRESERVE_P is
true only if the label address was taken, check LABEL_PRESERVE_P before
inserting ENDBR.

2019-02-15  H.J. Lu  
Hongtao Liu  

gcc/

PR target/89355
* config/i386/i386.c (rest_of_insert_endbranch): LABEL_PRESERVE_P
to see if the address of NOTE_INSN_DELETED_LABEL is taken.
(ix86_init_large_pic_reg): Don't set LABEL_PRESERVE_P.

gcc/testsuite/

PR target/89355
* gcc.target/i386/cet-label-3.c: New test.
* gcc.target/i386/cet-label-4.c: Likewise.
* gcc.target/i386/cet-label-5.c: Likewise.
---
 gcc/config/i386/i386.c  |  9 
 gcc/testsuite/gcc.target/i386/cet-label-3.c | 23 +
 gcc/testsuite/gcc.target/i386/cet-label-4.c | 12 +++
 gcc/testsuite/gcc.target/i386/cet-label-5.c | 13 
 4 files changed, 52 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/cet-label-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/cet-label-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/cet-label-5.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 609273e4fc4..acdc789b834 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2734,10 +2734,10 @@ rest_of_insert_endbranch (void)
  continue;
}
 
- if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
- || (NOTE_P (insn)
- && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
-   /* TODO.  Check /s bit also.  */
+ if ((LABEL_P (insn)
+  || (NOTE_P (insn)
+  && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
+ && LABEL_PRESERVE_P (insn))
{
  cet_eb = gen_nop_endbr ();
  emit_insn_after (cet_eb, insn);
@@ -6997,7 +6997,6 @@ ix86_init_large_pic_reg (unsigned int tmp_regno)
   gcc_assert (Pmode == DImode);
   label = gen_label_rtx ();
   emit_label (label);
-  LABEL_PRESERVE_P (label) = 1;
   tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
   gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
   emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
diff --git a/gcc/testsuite/gcc.target/i386/cet-label-3.c 
b/gcc/testsuite/gcc.target/i386/cet-label-3.c
new file mode 100644
index 000..9f427a866f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/cet-label-3.c
@@ -0,0 +1,23 @@
+/* PR target/89355  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fcf-protection" } */
+/* { dg-final { scan-assembler-times "endbr32" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "endbr64" 1 { target { ! ia32 } } } } */
+int
+test (int* val)
+{
+  int status = 99;
+
+  if (!val)
+{
+  status = 22;
+  goto end;
+}
+
+  extern int x;
+  *val = x;
+
+  status = 0;
+end:
+  r

[PATCH] i386: Add ssse3_pmulhrswv4hi3 expander

2019-02-16 Thread H.J. Lu
There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require
AVX2.  But ssse3_pmulhrswv4hi3 requires MMX.

PR target/89372
* config/i386/sse.md (ssedoublemode): Remove V4HI.
(PMULHRSW): Likewise.
(_pmulhrsw3): Require TARGET_SSSE3, not
TARGET_AVX2.
(ssse3_pmulhrswv4hi3): New expander.
(*ssse3_pmulhrswv4hi3): Require TARGET_MMX.
---
 gcc/config/i386/sse.md | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8281fe2d398..839e38c46f0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -596,7 +596,7 @@
   [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
(V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
(V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
-   (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
+   (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
(V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
(V4DI "V8DI") (V8DI "V16DI")])
 
@@ -15590,7 +15590,7 @@
(set_attr "mode" "DI")])
 
 (define_mode_iterator PMULHRSW
-  [V4HI V8HI (V16HI "TARGET_AVX2")])
+  [V8HI (V16HI "TARGET_AVX2")])
 
 (define_expand "_pmulhrsw3_mask"
   [(set (match_operand:PMULHRSW 0 "register_operand")
@@ -15629,7 +15629,7 @@
(const_int 14))
  (match_dup 3))
(const_int 1]
-  "TARGET_AVX2"
+  "TARGET_SSSE3"
 {
   operands[3] = CONST1_RTX(mode);
   ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
@@ -15662,6 +15662,26 @@
(set_attr "prefix" "orig,maybe_evex,evex")
(set_attr "mode" "")])
 
+(define_expand "ssse3_pmulhrswv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+   (truncate:V4HI
+ (lshiftrt:V4SI
+   (plus:V4SI
+ (lshiftrt:V4SI
+   (mult:V4SI
+ (sign_extend:V4SI
+   (match_operand:V4HI 1 "nonimmediate_operand"))
+ (sign_extend:V4SI
+   (match_operand:V4HI 2 "nonimmediate_operand")))
+   (const_int 14))
+ (match_dup 3))
+   (const_int 1]
+  "TARGET_MMX && TARGET_SSSE3"
+{
+  operands[3] = CONST1_RTX(V4HImode);
+  ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
+})
+
 (define_insn "*ssse3_pmulhrswv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
@@ -15676,7 +15696,9 @@
(const_int 14))
  (match_operand:V4HI 3 "const1_operand"))
(const_int 1]
-  "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "TARGET_MMX
+   && TARGET_SSSE3
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "pmulhrsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
-- 
2.20.1



[PATCH] i386: Correct *vec_extractv2si_zext_mem

2019-02-16 Thread H.J. Lu
The second and third alternatives in *vec_extractv2si_zext_mem don't
require MMX.  But the second one requires SSE2.

* config/i386/mmx.md (*vec_extractv2si_zext_mem): Doesn't require
MMX.  Add isa attribute.
---
 gcc/config/i386/mmx.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c1e0f2c411e..b566cc80020 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1354,13 +1354,14 @@
  (vec_select:SI
(match_operand:V2SI 1 "memory_operand" "o,o,o")
(parallel [(match_operand:SI 2 "const_0_to_1_operand")]]
-  "TARGET_64BIT && TARGET_MMX"
+  "TARGET_64BIT"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
 {
   operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
-})
+}
+  [(set_attr "isa" "*,sse2,*")])
 
 (define_expand "vec_extractv2sisi"
   [(match_operand:SI 0 "register_operand")
-- 
2.20.1



Re: *ping* [patch, fortran] Fix part of PR 71066

2019-02-16 Thread Janne Blomqvist
On Sat, Feb 16, 2019 at 3:34 PM Thomas Koenig  wrote:

> Am 10.02.19 um 12:13 schrieb Thomas Koenig:
> > Hello world,
> >
> > this patch fixes the coarray part of PR 71066 - handling of data
> > statements for coarrays.  The PR itself is marked as a 7/8/9
> > regression.
> >
> > Regression-tested.  OK for trunk and for backporting?
>
> Ping?
>
> Regards
>
> Thomas
>

Ok.


-- 
Janne Blomqvist


Re: [PATCH] Fix up norm2 simplification (PR middle-end/88074)

2019-02-16 Thread Thomas Koenig

Hi Jakub,

I checked the patch together with Richard's (by which I assume you
mean https://gcc.gnu.org/bugzilla/attachment.cgi?id=45052 ), and
thinks looked good.

So, the Fortran part of this is OK.

However, we should really also do power-of-two scaling
for the runtime method.

Also, we seem to have a lot of issues with IEEE flags
when calculating NORM2, this would also need to be
addressed.

Regards

Thomas


[libgo] Fix alignment issue in persistent allocator

2019-02-16 Thread Eric Botcazou
This gets rid of a bunch of Go failures on SPARC.

Tested on x86-64/Linux, SPARC/Solaris and SPARC64/Linux.


2019-02-16  Eric Botcazou  

* go/runtime/malloc.go (persistentalloc1): Always align the offset.

-- 
Eric BotcazouIndex: go/runtime/malloc.go
===
--- go/runtime/malloc.go	(revision 268849)
+++ go/runtime/malloc.go	(working copy)
@@ -1269,7 +1269,7 @@ func persistentalloc1(size, align uintpt
 break
 			}
 		}
-		persistent.off = sys.PtrSize
+		persistent.off = round(sys.PtrSize, align)
 	}
 	p := persistent.base.add(persistent.off)
 	persistent.off += size


Re: [RS6000] Fix _ and tf_ splitters

2019-02-16 Thread Segher Boessenkool
Hi Alan,

On Sat, Feb 16, 2019 at 10:40:24PM +1030, Alan Modra wrote:
> This patch fixes a bug that can result in "insn does not satisfy its
> constraints" if these splitters fire due to not getting ctr for the
> jump insn.  Since the jump insn can have any of r,m,d,wi,c,l as the
> decremented count output, it's not sufficient to check for
> gpc_reg_operand (which matches VSX regs for example).  Seen after
> correcting register_move_cost when the cost of gpr <-> vsx is much
> lower.  Since this is a prerequisite to fixing PR89271, I'm mentioning
> that PR in the ChangeLog.
> 
> The tf_ split had a further bug in that it wouldn't match
> if the count output was m,d,wi, or l.
> 
> Bootstrapped etc. powerpc64le-linux.  OK?

Okay, thanks!  For trunk as well as backports.


Segher


[PATCH] Fix s390 backend with old binutils (PR target/89361)

2019-02-16 Thread Jakub Jelinek
Hi!

If S390_USE_TARGET_ATTRIBUTE is 0 (e.g. because of configuring against old
binutils or even with no binutils at all), then indirect jumps are emitted
unconditionally, no matter what is selected on the command line, including
the default options.  The problem is that s390_indirect_branch_settings
is never called and only that function sets the flags the *.md macros test.

Fixed thusly, bootstrapped/regtested on s390x-linux (with recent binutils)
and tested on x86_64-linux -> s390x-linux cross (without any binutils).
Ok for trunk and after a while for release branches?

2019-02-16  Jakub Jelinek  

PR target/89361
* config/s390/s390.c (s390_indirect_branch_attrvalue,
s390_indirect_branch_settings): Define unconditionally.
(s390_set_current_function): Likewise, but guard the whole body except
the s390_indirect_branch_settings call with
#if S390_USE_TARGET_ATTRIBUTE.
(TARGET_SET_CURRENT_FUNCTION): Redefine unconditionally.

--- gcc/config/s390/s390.c.jj   2019-02-12 21:48:52.944076465 +0100
+++ gcc/config/s390/s390.c  2019-02-15 14:16:46.053206087 +0100
@@ -15462,6 +15462,7 @@ s390_can_inline_p (tree caller, tree cal
 
   return ret;
 }
+#endif
 
 /* Set VAL to correct enum value according to the indirect-branch or
function-return attribute in ATTR.  */
@@ -15535,6 +15536,7 @@ s390_indirect_branch_settings (tree fnde
 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
 }
 
+#if S390_USE_TARGET_ATTRIBUTE
 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
cache.  */
 
@@ -15550,6 +15552,7 @@ s390_activate_target_options (tree new_t
 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
   s390_previous_fndecl = NULL_TREE;
 }
+#endif
 
 /* Establish appropriate back-end context for processing the function
FNDECL.  The argument might be NULL to indicate processing at top
@@ -15557,6 +15560,7 @@ s390_activate_target_options (tree new_t
 static void
 s390_set_current_function (tree fndecl)
 {
+#if S390_USE_TARGET_ATTRIBUTE
   /* Only change the context if the function changes.  This hook is called
  several times in the course of compiling a function, and we don't want to
  slow things down too much or call target_reinit when it isn't safe.  */
@@ -15588,10 +15592,9 @@ s390_set_current_function (tree fndecl)
   if (old_tree != new_tree)
 s390_activate_target_options (new_tree);
   s390_previous_fndecl = fndecl;
-
+#endif
   s390_indirect_branch_settings (fndecl);
 }
-#endif
 
 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
 
@@ -16331,10 +16334,10 @@ s390_case_values_threshold (void)
 #undef TARGET_ASM_FILE_END
 #define TARGET_ASM_FILE_END s390_asm_file_end
 
-#if S390_USE_TARGET_ATTRIBUTE
 #undef TARGET_SET_CURRENT_FUNCTION
 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
 
+#if S390_USE_TARGET_ATTRIBUTE
 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
 

Jakub


[PATCH] s390.md fixes for 32-bit host

2019-02-16 Thread Jakub Jelinek
Hi!

While looking into PR89369, I've noticed various spots in s390.md
using 1ul or 1UL which might not work properly if the host is e.g. ilp32,
even even instead of using ULL constants it is better to use
HOST_WIDE_INT_* macros for HOST_WIDE_INT contexts.

Bootstrapped/regtested on s390x-linux, ok for trunk?

2019-02-16  Jakub Jelinek  

* config/s390/s390.md (*_ior_and_sr_ze,
*__ior_and_lshiftrt, *_sidi_ior_and_lshiftrt):
Use HOST_WIDE_INT_M1U instead of ~(0ULL).
(*_and_subregdi_rotr, *_and_subregdi_rotl): Use
HOST_WIDE_INT_1U instead of 1ULL.
(*pre_z10_extzv, *pre_z10_extv): Change mask type from int
to unsigned HOST_WIDE_INT, use HOST_WIDE_INT_1U instead of 1ul.
(*insv_appendbitsleft,
z = (x << c) | (y >> d) splitters): Use HOST_WIDE_INT_1U
instead of 1UL.
(*insv_mem_reg, *insvdi_mem_reghigh): Use HOST_WIDE_INT_1U
instead of 1ul.

--- gcc/config/s390/s390.md.jj  2019-02-15 18:54:35.037131906 +0100
+++ gcc/config/s390/s390.md 2019-02-15 19:19:02.201945111 +0100
@@ -3917,7 +3917,7 @@ (define_insn "*_ior_and_sr_ze"
 4)))]
   "
&& EXTRACT_ARGS_IN_RANGE (INTVAL (operands[4]), INTVAL (operands[5]), 64)
-   && UINTVAL (operands[2]) == (~(0ULL) << UINTVAL (operands[4]))"
+   && UINTVAL (operands[2]) == (HOST_WIDE_INT_M1U << UINTVAL (operands[4]))"
   "\t%0,%3,64-%4,63,%4+%5"
   [(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
@@ -3943,7 +3943,8 @@ (define_insn "*_and_subregdi_ro
 (match_operand:SINT 2 "const_int_operand" "")) 0)
(match_operand:DI 3 "contiguous_bitmask_operand" "")))]
   "
-   && UINTVAL (operands[3]) < (1ULL << (UINTVAL (operands[2]) & 0x3f))"
+   && (UINTVAL (operands[3])
+   < (HOST_WIDE_INT_1U << (UINTVAL (operands[2]) & 0x3f)))"
   "\t%0,%1,%s3,128+%e3,%2" ; dst, src, start, end, shift
   [(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
@@ -3955,7 +3956,8 @@ (define_insn "*_and_subregdi_ro
 (match_operand:SINT 2 "const_int_operand" "")) 0)
(match_operand:DI 3 "contiguous_bitmask_operand" "")))]
   "
-   && !(UINTVAL (operands[3]) & ((1ULL << (UINTVAL (operands[2]) & 0x3f)) - 
1))"
+   && !(UINTVAL (operands[3])
+   & ((HOST_WIDE_INT_1U << (UINTVAL (operands[2]) & 0x3f)) - 1))"
   "\t%0,%1,%s3,128+%e3,%2" ; dst, src, start, end, shift
   [(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
@@ -3986,7 +3988,8 @@ (define_insn_and_split "*pre_z10_extzv
-   && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
+   && UINTVAL (operands[2]) == (HOST_WIDE_INT_1U << UINTVAL (operands[4])) - 1"
   "\t%0,%3,,64-%4-1,%4"
   [(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
@@ -4131,7 +4135,8 @@ (define_insn "*__ior_and_
  (match_operand:GPR 3 "register_operand" "d")
  (match_operand:GPR 4 "nonzero_shift_count_operand" ""]
   " && UINTVAL (operands[2])
-   == (~(0ULL) << (GET_MODE_BITSIZE (mode) - UINTVAL (operands[4])))"
+   == (HOST_WIDE_INT_M1U
+   << (GET_MODE_BITSIZE (mode) - UINTVAL (operands[4])))"
   "\t%0,%3,%4,63,64-%4"
   [(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
@@ -4147,7 +4152,7 @@ (define_insn "*_sidi_ior_and_ls
  (match_operand:DI 3 "register_operand" "d")
  (match_operand:DI 4 "nonzero_shift_count_operand" "")) 4)))]
   "
-   && UINTVAL (operands[2]) == ~(~(0ULL) >> UINTVAL (operands[4]))"
+   && UINTVAL (operands[2]) == ~(HOST_WIDE_INT_M1U >> UINTVAL (operands[4]))"
   "\t%0,%3,%4,63,64-%4"
   [(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
@@ -4182,7 +4187,7 @@ (define_split
(ior:GPR (and:GPR (match_dup 6) (match_dup 5))
 (ashift:GPR (match_dup 3) (match_dup 4]
 {
-  operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+  operands[5] = GEN_INT ((HOST_WIDE_INT_1U << UINTVAL (operands[4])) - 1);
   if (reg_overlap_mentioned_p (operands[0], operands[3]))
 {
   if (!can_create_pseudo_p ())
@@ -4210,7 +4215,7 @@ (define_split
   (ashift:GPR (match_dup 3) (match_dup 4
  (clobber (reg:CC CC_REGNUM))])]
 {
-  operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+  operands[5] = GEN_INT ((HOST_WIDE_INT_1U << UINTVAL (operands[4])) - 1);
   if (reg_overlap_mentioned_p (operands[0], operands[3]))
 {
   if (!can_create_pseudo_p ())
@@ -4412,7 +4417,7 @@ (define_insn "*insv_mem_reg"
 {
 int size = INTVAL (operands[1]) / BITS_PER_UNIT;
 
-operands[1] = GEN_INT ((1ul << size) - 1);
+operands[1] = GEN_INT ((HOST_WIDE_INT_1U << size) - 1);
 return (which_alternative == 0) ? "stcm\t%2,%1,%S0"
: "stcmy\t%2,%1,%S0";
 }
@@ -4434,7 +4439,7 @@ (define_insn "*insvdi_mem_reghigh"
 {
 int size = INTVAL (operands[1]) / BITS_PER_UNIT;
 
-operands[1] = G

Re: [PATCH] Fix up norm2 simplification (PR middle-end/88074)

2019-02-16 Thread Steve Kargl
On Sat, Feb 16, 2019 at 05:23:58PM +0100, Thomas Koenig wrote:
> 
> Also, we seem to have a lot of issues with IEEE flags
> when calculating NORM2, this would also need to be
> addressed.
> 

Which IEEE flags and are you referring using the
Fortran modules or -ffpe-trap?

-- 
Steve


[PATCH] Fix *rsbg_sidi_srl pattern (PR target/89369)

2019-02-16 Thread Jakub Jelinek
Hi!

The following patch fixes wrong-code on the following testcase extracted
from pseudo-RNG with e.g. -march=zEC12 -O2.
The problem is in the instruction emitted by the *rsbg_sidi_srl
patterns.  We have in *.final correct:
(insn 67 65 68 (parallel [
(set (reg:SI 1 %r1 [189])
(xor:SI (subreg:SI (zero_extract:DI (reg/v:DI 11 %r11 [orig:89 
th ] [89])
(const_int 32 [0x20])
(const_int 24 [0x18])) 4)
(reg:SI 1 %r1 [187])))
(clobber (reg:CC 33 %cc))
]) "pr89369.c":44:73 1419 {*rxsbg_sidi_srl}
 (expr_list:REG_DEAD (reg/v:DI 11 %r11 [orig:89 th ] [89])
(expr_list:REG_UNUSED (reg:CC 33 %cc)
(nil
which is effectively (reg:SI %r1) ^= (unsigned) ((reg:DI %r11) >> 8).
But the pattern emits rxsbg %r1,%r11,40,63,56 which is effectively
(reg:SI %r1) ^= ((unsigned) ((reg:DI %r11) >> 8) & 0xff)
or equivalently
(reg:SI %r1) ^= ((reg:SI %r11) >> 8).  Even in the pattern one can see
that it wants to extract exactly 32 bits always, no matter what the shift
count is.  Fixed by always emitting 32,63,(32+pos from zero extract).
On that pr89369.c testcase, the patch also changes
-   rxsbg   %r12,%r9,64,63,32
-   rxsbg   %r12,%r1,64,63,32
+   rxsbg   %r12,%r9,32,63,32
+   rxsbg   %r12,%r1,32,63,32
and
-   rxsbg   %r1,%r3,64,63,32
+   rxsbg   %r1,%r3,32,63,32
which are all with zero_extract with len 32 and pos 0, so again, it wants
to extract the low 32 bits.  I3 64 larger than I4 63 is just weird.
The patch also changes the instructions emitted in rXsbg_mode_sXl.c:
-   rosbg   %r2,%r3,34,63,62
+   rosbg   %r2,%r3,32,63,62
and
-   rxsbg   %r2,%r3,34,63,62
+   rxsbg   %r2,%r3,32,63,62
Here, it is
__attribute__ ((noinline)) unsigned int
rosbg_si_srl (unsigned int a, unsigned int b)
{
  return a | (b >> 2);
}
__attribute__ ((noinline)) unsigned int
rxsbg_si_srl (unsigned int a, unsigned int b)
{
  return a ^ (b >> 2);
}
so from quick POV, one might think 34,63,62 is better, as we want to or in
just the 30 bits from b.  Both should actually work the same though, because
(subreg/s/v:SI (reg/v:DI 64 [ b+-4 ]) 4) - the b argument is passed zero
extended and so it doesn't really matter how many bits we extract, as long
as it is 30 or more.  If I try instead:
__attribute__ ((noinline, noipa)) unsigned int
rosbg_si_srl (unsigned int a, unsigned long long b)
{
  return a | ((unsigned) b >> 2);
}
__attribute__ ((noinline, noipa)) unsigned int
rxsbg_si_srl (unsigned int a, unsigned long long b)
{
  return a ^ ((unsigned) b >> 2);
}
then both the unpatched and patched compiler emit properly
rosbg   %r2,%r3,34,63,62
and
rxsbg   %r2,%r3,34,63,62
through a different pattern, because in that case we must not rely on the
upper 32 bits of b being zero.

In addition to this change, the patch adds a cleanup, there is no reason to
use a static buffer in each instruction and increase global state, we can
just tweak the arguments and let the caller deal with it.  That is something
normally done in other parts of the s390.md as well.  As small CONST_INTs
are hashed, it shouldn't increase compile time memory.

Bootstrapped/regtested on s390x-linux, ok for trunk?

2019-02-16  Jakub Jelinek  

PR target/89369
* config/s390/s390.md (*rsbg__srl_bitmask,
*rsbg__sll, *rsbg__srl): Don't construct
pattern in a temporary buffer.
(*rsbg_sidi_srl): Likewise.  Always use 32 as I3 rather
than 64-operands[2].

* gcc.c-torture/execute/pr89369.c: New test.
* gcc.target/s390/md/rXsbg_mode_sXl.c (rosbg_si_srl,
rxsbg_si_srl): Expect last 3 operands 32,63,62 rather than
34,63,62.

--- gcc/config/s390/s390.md.jj  2019-02-05 22:59:04.883503954 +0100
+++ gcc/config/s390/s390.md 2019-02-15 18:54:35.037131906 +0100
@@ -4263,10 +4263,8 @@ (define_insn "*rsbg__srl_bit
&& s390_extzv_shift_ok (, 64 - INTVAL (operands[3]),
INTVAL (operands[2]))"
   {
-static char buffer[256];
-sprintf (buffer, "rsbg\t%%0,%%1,%%2,%%2,%ld",
- 64 - INTVAL (operands[3]));
-return buffer;
+operands[3] = GEN_INT (64 - INTVAL (operands[3]));
+return "rsbg\t%0,%1,%2,%2,%3";
   }
   [(set_attr "op_type" "RIE")])
 
@@ -4301,10 +4299,8 @@ (define_insn "*rsbg__sll"
(clobber (reg:CC CC_REGNUM))]
   "TARGET_Z10"
   {
-static char buffer[256];
-sprintf (buffer, "rsbg\t%%0,%%1,,%ld,%%2",
- 63 - INTVAL (operands[2]));
-return buffer;
+operands[3] = GEN_INT (63 - INTVAL (operands[2]));
+return "rsbg\t%0,%1,,%3,%2";
   }
   [(set_attr "op_type" "RIE")])
 
@@ -4322,10 +4318,9 @@ (define_insn "*rsbg__srl"
(clobber (reg:CC CC_REGNUM))]
   "TARGET_Z10"
   {
-static char buffer[256];
-sprintf (buffer, "rsbg\t%%0,%%1,%ld,63,%ld",
-  INTVAL (operands[2]), 64 - INTVAL (operands[2]));
-return buffer;
+operan

Re: [C++ PATCH] Fix cxx_eval_store_expression (PR c++/89336)

2019-02-16 Thread Jason Merrill

On 2/13/19 6:02 PM, Jakub Jelinek wrote:

As the following testcases shows, cxx_eval_store_expression mishandles the
case when constexpr evaluation of the rhs (init) modifies part of the ctor
that the store stores into.
Except for unions (see below) I believe it is fine the way the outer refs
are handled, because we advance into another CONSTRUCTOR and if the pointer
to that is reallocated or memmoved somewhere else, it shouldn't matter.
For the last CONSTRUCTOR, we set valp to
&CONSTRUCTOR_ELT (*valp, something)->value
but CONSTRUCTOR_ELTS is not a linked list, but vector, which can be
reallocated if we need more elements, or vec_safe_insert some earlier
element memmoves further elts later in the same vector.

The likely case is still that nothing has changed in between, so this patch
just quickly verifies if that is the case (by comparing
CONSTRUCTOR_ELT (ctor, 0) with the previously saved value of that and by
checking if at the spot in the vector is the expected index).  If that is
the case, it doesn't do anything else, otherwise it updates the valp
pointer.


For scalar types, as in all your testcases, we can evaluate the 
initializer before the target, as C++17 wants.  We probably still need 
your patch for when type is a class.



Note, at least by my reading of the standard, the union case seems to be
mishandled (and the patch doesn't change anything on that).  The union
member being stored should IMHO become active after evaluating both the
lhs and rhs, but before the actual store, while the current code
invalidates the previously active member already before evaluating the rhs
(if it is different from the upcoming one).  I think
constexpr int foo () {
   union U { int a; long b; };
   union V { union U u; short v; };
   V w {};
   w.u.a = w.v = w.u.b = 5L;
   return w.u.a;
}
static_assert (foo () == 5, "");
should be valid (though clang++ rejects it as well).  If it is indeed valid,
it is not going to be very easy to implement properly


This testcase will also work if we evaluate scalar init first.  The 
difficult case is when type is a class, something like


struct A { int i; constexpr A(): i(42) { } };
struct B { A a; };
struct C { A a; };
union U { B b; C c; constexpr U(): b() {} };

constexpr int f()
{
  U u;
  u.c.a = u.b.a;
  return u.c.a.i;
}

static_assert (f() == 42);

But actually, we should be able to evaluate init first in class 
assignment cases as well, making this also work; we only need to have 
the target available first for initialization, in case the constructor 
refers to the object by another path.  We likely still need your patch 
for that case, though I'm having trouble coming up with a testcase.


commit b7cf563e1715909221f783747bb5e194dd38d803
Author: Jason Merrill 
Date:   Fri Feb 15 13:09:33 2019 -1000

preeval

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 923763faa0a..ea1e999620e 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -3634,6 +3634,16 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, 
tree t,
   maybe_simplify_trivial_copy (target, init);
 
   tree type = TREE_TYPE (target);
+  bool preeval = SCALAR_TYPE_P (type) || TREE_CODE (t) == MODIFY_EXPR;
+  if (preeval)
+{
+  /* Evaluate the value to be stored without knowing what object it will be
+stored in, so that any side-effects happen first.  */
+  if (!SCALAR_TYPE_P (type))
+   new_ctx.ctor = new_ctx.object = NULL_TREE;
+  init = cxx_eval_constant_expression (&new_ctx, init, false,
+  non_constant_p, overflow_p);
+}
   target = cxx_eval_constant_expression (ctx, target,
 true,
 non_constant_p, overflow_p);
@@ -3849,8 +3859,9 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree 
t,
   new_ctx.object = target;
 }
 
-  init = cxx_eval_constant_expression (&new_ctx, init, false,
-  non_constant_p, overflow_p);
+  if (!preeval)
+init = cxx_eval_constant_expression (&new_ctx, init, false,
+non_constant_p, overflow_p);
   /* Don't share a CONSTRUCTOR that might be changed later.  */
   init = unshare_constructor (init);
   if (target == object)


Re: [PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled

2019-02-16 Thread Uros Bizjak
On 2/16/19, H.J. Lu  wrote:
> On Sat, Feb 16, 2019 at 12:58 AM Uros Bizjak  wrote:
>>
>> On 2/16/19, H.J. Lu  wrote:
>> > With SSE emulation of MMX intrinsics, we should make _mm_empty () as
>> > NOP
>> > when MMX is disabled.
>> >
>> >   PR target/89021
>> >   * config/i386/mmx.md (mmx_): Renamed to ...
>> >   (mmx__1): This.
>> >   (mmx_): New expander.
>> > ---
>> >  gcc/config/i386/mmx.md | 29 -
>> >  1 file changed, 28 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
>> > index 9cf0251293a..0f925c0b1ea 100644
>> > --- a/gcc/config/i386/mmx.md
>> > +++ b/gcc/config/i386/mmx.md
>> > @@ -1848,7 +1848,34 @@
>> >[(UNSPECV_EMMS "emms")
>> > (UNSPECV_FEMMS "femms")])
>> >
>> > -(define_insn "mmx_"
>> > +(define_expand "mmx_"
>> > +  [(unspec_volatile [(const_int 0)] EMMS)
>> > +   (clobber (reg:XF ST0_REG))
>> > +   (clobber (reg:XF ST1_REG))
>> > +   (clobber (reg:XF ST2_REG))
>> > +   (clobber (reg:XF ST3_REG))
>> > +   (clobber (reg:XF ST4_REG))
>> > +   (clobber (reg:XF ST5_REG))
>> > +   (clobber (reg:XF ST6_REG))
>> > +   (clobber (reg:XF ST7_REG))
>> > +   (clobber (reg:DI MM0_REG))
>> > +   (clobber (reg:DI MM1_REG))
>> > +   (clobber (reg:DI MM2_REG))
>> > +   (clobber (reg:DI MM3_REG))
>> > +   (clobber (reg:DI MM4_REG))
>> > +   (clobber (reg:DI MM5_REG))
>> > +   (clobber (reg:DI MM6_REG))
>> > +   (clobber (reg:DI MM7_REG))]
>> > +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
>> > +{
>> > +   if (TARGET_MMX)
>> > + emit_insn (gen_mmx__1 ());
>> > +   else
>> > + emit_insn (gen_nop ());
>> > +   DONE;
>>
>> The above should be written as:
>>
>> if (!TARGET_MMX)
>>   {
>> emit_insn (gen_nop ()));
>> DONE;
>>   }
>>
>> > +})
>> > +
>> > +(define_insn "mmx__1"
>>
>> The old insn should be renamed to "*mmx_".
>>
>> Uros.
>
> Tried and got

You have to wrap the pattern in a parallel in the expander.

Uros.

>
> [hjl@gnu-cfl-2 gcc]$ cat x.c
> #include 
>
> void
> foo (void)
> {
>   _mm_empty ();
> }
> [hjl@gnu-cfl-2 gcc]$ ./xgcc -B./ -S x.c -da
> x.c: In function ‘foo’:
> x.c:7:1: error: unrecognizable insn:
> 7 | }
>   | ^
> (insn 5 2 6 2 (unspec_volatile [
> (const_int 0 [0])
> ] UNSPECV_EMMS) "./include/mmintrin.h":60:3 -1
>  (nil))
> during RTL pass: vregs
> dump file: x.c.234r.vregs
> x.c:7:1: internal compiler error: in extract_insn, at recog.c:2310
> 0x10ad84d _fatal_insn(char const*, rtx_def const*, char const*, int,
> char const*)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/rtl-error.c:108
> 0x10ad88e _fatal_insn_not_found(rtx_def const*, char const*, int, char
> const*)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/rtl-error.c:116
> 0x1042abb extract_insn(rtx_insn*)
> /export/gnu/import/git/gitlab/x86-gcc/gcc/recog.c:2310
> 0xc95912 instantiate_virtual_regs_in_insn
> /export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:1654
> 0xc96d44 instantiate_virtual_regs
> /export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:1975
> 0xc96e0e execute
> /export/gnu/import/git/gitlab/x86-gcc/gcc/function.c:2024
> Please submit a full bug report,
> with preprocessed source if appropriate.
> Please include the complete backtrace with any bug report.
> See  for instructions.
> [hjl@gnu-cfl-2 gcc]$
>
> ;;
> ;; Full RTL generated for this function:
> ;;
> (note 1 0 3 NOTE_INSN_DELETED)
> ;; basic block 2, loop depth 0, maybe hot
> ;;  prev block 0, next block 1, flags: (NEW, REACHABLE, RTL)
> ;;  pred:   ENTRY (FALLTHRU)
> (note 3 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
> (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
> (insn 5 2 6 2 (unspec_volatile [
> (const_int 0 [0])
> ] UNSPECV_EMMS) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 6 5 7 2 (clobber (reg:XF 8 st)) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 7 6 8 2 (clobber (reg:XF 9 st(1))) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 8 7 9 2 (clobber (reg:XF 10 st(2))) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 9 8 10 2 (clobber (reg:XF 11 st(3))) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 10 9 11 2 (clobber (reg:XF 12 st(4))) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 11 10 12 2 (clobber (reg:XF 13 st(5))) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 12 11 13 2 (clobber (reg:XF 14 st(6))) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 13 12 14 2 (clobber (reg:XF 15 st(7))) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 14 13 15 2 (clobber (reg:DI 28 mm0)) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 15 14 16 2 (clobber (reg:DI 29 mm1)) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 16 15 17 2 (clobber (reg:DI 30 mm2)) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 17 16 18 2 (clobber (reg:DI 31 mm3)) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 18 17 19 2 (clobber (reg:DI 32 mm4)) "./include/mmintrin.h":60:3 -1
>  (nil))
> (insn 19 18 20 2 (clobber (reg:DI 33 mm5)) "./include/mm

Re: [PATCH] i386: Correct *vec_extractv2si_zext_mem

2019-02-16 Thread Uros Bizjak
On 2/16/19, H.J. Lu  wrote:
> The second and third alternatives in *vec_extractv2si_zext_mem don't
> require MMX.  But the second one requires SSE2.
>
>   * config/i386/mmx.md (*vec_extractv2si_zext_mem): Doesn't require
>   MMX.  Add isa attribute.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/mmx.md | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index c1e0f2c411e..b566cc80020 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1354,13 +1354,14 @@
> (vec_select:SI
>   (match_operand:V2SI 1 "memory_operand" "o,o,o")
>   (parallel [(match_operand:SI 2 "const_0_to_1_operand")]]
> -  "TARGET_64BIT && TARGET_MMX"
> +  "TARGET_64BIT"
>"#"
>"&& reload_completed"
>[(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
>  {
>operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) *
> 4);
> -})
> +}
> +  [(set_attr "isa" "*,sse2,*")])
>
>  (define_expand "vec_extractv2sisi"
>[(match_operand:SI 0 "register_operand")
> --
> 2.20.1
>
>


New French PO file for 'gcc' (version 9.1-b20190203)

2019-02-16 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the French team of translators.  The file is available at:

https://translationproject.org/latest/gcc/fr.po

(This file, 'gcc-9.1-b20190203.fr.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

https://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

https://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




C++ PATCH to fix eb82.C

2019-02-16 Thread Marek Polacek
I noticed this test fails in c++2a since the implementation of P0846
landed in r265734.  Since it's in g++.old-deja/, I never noticted the
fail (but I don't see any others).  This patch tweaks a dg-error in
order to make it pass in c++2a also.

Tested on x86_64-linux, ok for trunk?

2019-02-16  Marek Polacek  

* g++.old-deja/g++.robertl/eb82.C: Tweak dg-error.

diff --git gcc/testsuite/g++.old-deja/g++.robertl/eb82.C 
gcc/testsuite/g++.old-deja/g++.robertl/eb82.C
index 9bf0398cd0a..fc2bf7866fe 100644
--- gcc/testsuite/g++.old-deja/g++.robertl/eb82.C
+++ gcc/testsuite/g++.old-deja/g++.robertl/eb82.C
@@ -9,5 +9,5 @@ double val  () // { dg-error "" } bogus code
 
 int main ()
 {
-   printf ("%d\n", val<(int)3> ()); // { dg-error "" } val undeclared
+   printf ("%d\n", val<(int)3> ()); // { dg-error "" "" { target c++17_down } 
} val undeclared
 }


Re: libgo patch committed: Add S/390 support to internal/cpu package

2019-02-16 Thread Jakub Jelinek
On Sat, Feb 16, 2019 at 08:42:11AM +0100, Jakub Jelinek wrote:
> On Fri, Feb 15, 2019 at 08:59:29PM +0100, Matthias Klose wrote:
> > On 15.02.19 15:52, Ian Lance Taylor wrote:
> > > This patch by Robin Dapp adds S/390 support to the internal/cpu
> > > package.  This partially addresses PR 89123.  I bootstrapped it on
> > > x86_64-pc-linux-gnu, which means little.  Committed to mainline.
> > 
> > fails in the -m31 multilib variant with
> 
> Indeed.  Given that there is just
> libgo/go/internal/cpu/cpu_s390x.go
> libgo/go/internal/cpu/cpu_s390x_test.go
> (note, no s390), I think the easiest fix is:

Bootstrapped/regtested on s390x-linux successfully now.

> --- libgo/go/internal/cpu/cpu_gccgo.c.jj  2019-02-16 07:57:27.882179972 
> +0100
> +++ libgo/go/internal/cpu/cpu_gccgo.c 2019-02-16 08:36:37.241900882 +0100
> @@ -71,7 +71,7 @@ struct xgetbv_ret xgetbv(void) {
>  
>  #endif /* defined(__i386__) || defined(__x86_64__)  */
>  
> -#ifdef __s390__
> +#ifdef __s390x__
>  
>  struct facilityList {
>   uint64_t bits[4];
> @@ -184,4 +184,4 @@ struct queryResult klmdQuery() {
>  return ret;
>  }
>  
> -#endif /* defined(__s390__)  */
> +#endif /* defined(__s390x__)  */
> 
> If cpu_s390.go is ever added, this can be changed again and there can be say
> #ifdef __s390x__
> #define LHI "lghi"
> #else
> #define LHI "lhi"
> #endif
> and replace "lghi ... in the inline asm with LHI "...

Jakub


Re: [PATCH] i386: Add ssse3_pmulhrswv4hi3 expander

2019-02-16 Thread Uros Bizjak
On 2/16/19, H.J. Lu  wrote:
> There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require
> AVX2.  But ssse3_pmulhrswv4hi3 requires MMX.
>
>   PR target/89372
>   * config/i386/sse.md (ssedoublemode): Remove V4HI.
>   (PMULHRSW): Likewise.
>   (_pmulhrsw3): Require TARGET_SSSE3, not
>   TARGET_AVX2.
>   (ssse3_pmulhrswv4hi3): New expander.
>   (*ssse3_pmulhrswv4hi3): Require TARGET_MMX.
> ---
>  gcc/config/i386/sse.md | 30 ++
>  1 file changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 8281fe2d398..839e38c46f0 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -596,7 +596,7 @@
>[(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
> (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
> (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
> -   (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
> +   (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
> (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
> (V4DI "V8DI") (V8DI "V16DI")])
>
> @@ -15590,7 +15590,7 @@
> (set_attr "mode" "DI")])
>
>  (define_mode_iterator PMULHRSW
> -  [V4HI V8HI (V16HI "TARGET_AVX2")])
> +  [V8HI (V16HI "TARGET_AVX2")])
>
>  (define_expand "_pmulhrsw3_mask"
>[(set (match_operand:PMULHRSW 0 "register_operand")
> @@ -15629,7 +15629,7 @@
>   (const_int 14))
> (match_dup 3))
>   (const_int 1]
> -  "TARGET_AVX2"
> +  "TARGET_SSSE3"
>  {
>operands[3] = CONST1_RTX(mode);
>ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
> @@ -15662,6 +15662,26 @@
> (set_attr "prefix" "orig,maybe_evex,evex")
> (set_attr "mode" "")])
>
> +(define_expand "ssse3_pmulhrswv4hi3"
> +  [(set (match_operand:V4HI 0 "register_operand")
> + (truncate:V4HI
> +   (lshiftrt:V4SI
> + (plus:V4SI
> +   (lshiftrt:V4SI
> + (mult:V4SI
> +   (sign_extend:V4SI
> + (match_operand:V4HI 1 "nonimmediate_operand"))
> +   (sign_extend:V4SI
> + (match_operand:V4HI 2 "nonimmediate_operand")))
> + (const_int 14))
> +   (match_dup 3))
> + (const_int 1]
> +  "TARGET_MMX && TARGET_SSSE3"

Currently, there is no need for TARGET_MMX constraint on mainline.

> +{
> +  operands[3] = CONST1_RTX(V4HImode);
> +  ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
> +})
> +
>  (define_insn "*ssse3_pmulhrswv4hi3"
>[(set (match_operand:V4HI 0 "register_operand" "=y")
>   (truncate:V4HI
> @@ -15676,7 +15696,9 @@
>   (const_int 14))
> (match_operand:V4HI 3 "const1_operand"))
>   (const_int 1]
> -  "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> +  "TARGET_MMX
> +   && TARGET_SSSE3
> +   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>"pmulhrsw\t{%2, %0|%0, %2}"
>[(set_attr "type" "sseimul")
> (set_attr "prefix_extra" "1")

The above hunk is currently not needed.

OK for mainline without TARGET_MMX constraints.

Thanks,
Uros.


Re: [PATCH] i386: Add ssse3_pmulhrswv4hi3 expander

2019-02-16 Thread H.J. Lu
On Sat, Feb 16, 2019 at 10:45:11PM +0100, Uros Bizjak wrote:
> On 2/16/19, H.J. Lu  wrote:
> > There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require
> > AVX2.  But ssse3_pmulhrswv4hi3 requires MMX.
> >
> > PR target/89372
> > * config/i386/sse.md (ssedoublemode): Remove V4HI.
> > (PMULHRSW): Likewise.
> > (_pmulhrsw3): Require TARGET_SSSE3, not
> > TARGET_AVX2.
> > (ssse3_pmulhrswv4hi3): New expander.
> > (*ssse3_pmulhrswv4hi3): Require TARGET_MMX.
> > ---
> >  gcc/config/i386/sse.md | 30 ++
> >  1 file changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 8281fe2d398..839e38c46f0 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -596,7 +596,7 @@
> >[(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
> > (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
> > (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
> > -   (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
> > +   (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
> > (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
> > (V4DI "V8DI") (V8DI "V16DI")])
> >
> > @@ -15590,7 +15590,7 @@
> > (set_attr "mode" "DI")])
> >
> >  (define_mode_iterator PMULHRSW
> > -  [V4HI V8HI (V16HI "TARGET_AVX2")])
> > +  [V8HI (V16HI "TARGET_AVX2")])
> >
> >  (define_expand "_pmulhrsw3_mask"
> >[(set (match_operand:PMULHRSW 0 "register_operand")
> > @@ -15629,7 +15629,7 @@
> > (const_int 14))
> >   (match_dup 3))
> > (const_int 1]
> > -  "TARGET_AVX2"
> > +  "TARGET_SSSE3"
> >  {
> >operands[3] = CONST1_RTX(mode);
> >ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
> > @@ -15662,6 +15662,26 @@
> > (set_attr "prefix" "orig,maybe_evex,evex")
> > (set_attr "mode" "")])
> >
> > +(define_expand "ssse3_pmulhrswv4hi3"
> > +  [(set (match_operand:V4HI 0 "register_operand")
> > +   (truncate:V4HI
> > + (lshiftrt:V4SI
> > +   (plus:V4SI
> > + (lshiftrt:V4SI
> > +   (mult:V4SI
> > + (sign_extend:V4SI
> > +   (match_operand:V4HI 1 "nonimmediate_operand"))
> > + (sign_extend:V4SI
> > +   (match_operand:V4HI 2 "nonimmediate_operand")))
> > +   (const_int 14))
> > + (match_dup 3))
> > +   (const_int 1]
> > +  "TARGET_MMX && TARGET_SSSE3"
> 
> Currently, there is no need for TARGET_MMX constraint on mainline.
> 
> > +{
> > +  operands[3] = CONST1_RTX(V4HImode);
> > +  ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
> > +})
> > +
> >  (define_insn "*ssse3_pmulhrswv4hi3"
> >[(set (match_operand:V4HI 0 "register_operand" "=y")
> > (truncate:V4HI
> > @@ -15676,7 +15696,9 @@
> > (const_int 14))
> >   (match_operand:V4HI 3 "const1_operand"))
> > (const_int 1]
> > -  "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> > +  "TARGET_MMX
> > +   && TARGET_SSSE3
> > +   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> >"pmulhrsw\t{%2, %0|%0, %2}"
> >[(set_attr "type" "sseimul")
> > (set_attr "prefix_extra" "1")
> 
> The above hunk is currently not needed.
> 
> OK for mainline without TARGET_MMX constraints.
> 

This is the patch I am checking in.

H.J.
---
There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require
AVX2.

PR target/89372
* config/i386/sse.md (ssedoublemode): Remove V4HI.
(PMULHRSW): Likewise.
(_pmulhrsw3): Require TARGET_SSSE3, not
TARGET_AVX2.
(ssse3_pmulhrswv4hi3): New expander.
---
 gcc/config/i386/sse.md | 26 +++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8281fe2d398..8036136752a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -596,7 +596,7 @@
   [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
(V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
(V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
-   (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
+   (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
(V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
(V4DI "V8DI") (V8DI "V16DI")])
 
@@ -15590,7 +15590,7 @@
(set_attr "mode" "DI")])
 
 (define_mode_iterator PMULHRSW
-  [V4HI V8HI (V16HI "TARGET_AVX2")])
+  [V8HI (V16HI "TARGET_AVX2")])
 
 (define_expand "_pmulhrsw3_mask"
   [(set (match_operand:PMULHRSW 0 "register_operand")
@@ -15629,7 +15629,7 @@
(const_int 14))
  (match_dup 3))
(const_int 1]
-  "TARGET_AVX2"
+  "TARGET_SSSE3"
 {
   operands[3] = CONST1_RTX(mode);
   ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
@@ -15662,6 +15662,26 @@
(set_attr "prefix" "orig,maybe_evex,evex")
(set_attr "mode" "")])
 
+(define_expand "ssse3_pmulhrswv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+   (truncate:V4HI

[PATCH 03/41] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX

2019-02-16 Thread H.J. Lu
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/i386.c (ix86_split_mmx_punpck): New function.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.
---
 gcc/config/i386/i386-protos.h |   1 +
 gcc/config/i386/i386.c|  77 +++
 gcc/config/i386/mmx.md| 138 ++
 3 files changed, 168 insertions(+), 48 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index a53b48438ec..37581837a32 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -204,6 +204,7 @@ extern rtx ix86_split_stack_guard (void);
 
 extern void ix86_move_vector_high_sse_to_mmx (rtx);
 extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d31b69d9a82..a76c17beece 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20275,6 +20275,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code 
code)
   ix86_move_vector_high_sse_to_mmx (op0);
 }
 
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX.  */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  machine_mode mode = GET_MODE (op0);
+  rtx mask;
+  /* The corresponding SSE mode.  */
+  machine_mode sse_mode, double_sse_mode;
+
+  switch (mode)
+{
+case E_V8QImode:
+  sse_mode = V16QImode;
+  double_sse_mode = V32QImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (16,
+ GEN_INT (0), GEN_INT (16),
+ GEN_INT (1), GEN_INT (17),
+ GEN_INT (2), GEN_INT (18),
+ GEN_INT (3), GEN_INT (19),
+ GEN_INT (4), GEN_INT (20),
+ GEN_INT (5), GEN_INT (21),
+ GEN_INT (6), GEN_INT (22),
+ GEN_INT (7), GEN_INT (23)));
+  break;
+
+case E_V4HImode:
+  sse_mode = V8HImode;
+  double_sse_mode = V16HImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (8,
+ GEN_INT (0), GEN_INT (8),
+ GEN_INT (1), GEN_INT (9),
+ GEN_INT (2), GEN_INT (10),
+ GEN_INT (3), GEN_INT (11)));
+  break;
+
+case E_V2SImode:
+  sse_mode = V4SImode;
+  double_sse_mode = V8SImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4,
+ GEN_INT (0), GEN_INT (4),
+ GEN_INT (1), GEN_INT (5)));
+  break;
+
+default:
+  gcc_unreachable ();
+}
+
+  /* Generate SSE punpcklXX.  */
+  rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
+  op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
+  op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
+
+  op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+  op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+  rtx insn = gen_rtx_SET (dest, op2);
+  emit_insn (insn);
+
+  if (high_p)
+{
+  /* Move bits 64:127 to bits 0:63.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (0)));
+  dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
+  op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  insn = gen_rtx_SET (dest, op1);
+  emit_insn (insn);
+}
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 63a390923b6..0aa793395fb 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1064,87 +1064,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_opera

[PATCH 01/41] i386: Allow MMX register modes in SSE registers

2019-02-16 Thread H.J. Lu
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support MMX register modes.

PR target/89021
* config/i386/i386-c.c (ix86_target_macros_internal): Define
__MMX_WITH_SSE__ for TARGET_MMX_WITH_SSE.
* config/i386/i386.c (ix86_set_reg_reg_cost): Add support for
TARGET_MMX_WITH_SSE with VALID_MMX_REG_MODE.
(ix86_vector_mode_supported_p): Likewise.
* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
---
 gcc/config/i386/i386-c.c | 2 ++
 gcc/config/i386/i386.c   | 5 +++--
 gcc/config/i386/i386.h   | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 5e7e46fcebe..213e1b56c6b 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -548,6 +548,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
 def_or_undef (parse_in, "__CLDEMOTE__");
   if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
 def_or_undef (parse_in, "__PTWRITE__");
+  if (TARGET_MMX_WITH_SSE)
+def_or_undef (parse_in, "__MMX_WITH_SSE__");
   if (TARGET_IAMCU)
 {
   def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3e5f52175d2..7d7dd80930e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40490,7 +40490,8 @@ ix86_set_reg_reg_cost (machine_mode mode)
  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
- || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
+ || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE (mode)))
units = GET_MODE_SIZE (mode);
 }
 
@@ -44316,7 +44317,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
 return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
 return true;
-  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+  if ((TARGET_MMX ||TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
 return true;
   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
 return true;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 4fd8bc40a34..91b233022c2 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 #define TARGET_16BIT   TARGET_CODE16
 #define TARGET_16BIT_P(x)  TARGET_CODE16_P(x)
 
+#define TARGET_MMX_WITH_SSE(TARGET_64BIT && TARGET_SSE2)
+
 #include "config/vxworks-dummy.h"
 
 #include "config/i386/i386-opts.h"
-- 
2.20.1



[PATCH 04/41] i386: Emulate MMX plusminus/sat_plusminus with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX plusminus/sat_plusminus with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI.
(plusminus:mmx_3): Check
TARGET_MMX_WITH_SSE.
(sat_plusminus:mmx_3): Likewise.
(3): New.
(*mmx_3): Add SSE emulation.
(*mmx_3): Likewise.
---
 gcc/config/i386/mmx.md | 59 +++---
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0aa793395fb..587e31b299e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -45,7 +45,7 @@
 
 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
-(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
 
 ;; All 8-byte vector modes handled by MMX
 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
@@ -663,39 +663,56 @@
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI8 0 "register_operand")
(plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
-  "TARGET_MMX || (TARGET_SSE2 && mode == V1DImode)"
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (plusminus:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand")
+ (match_operand:MMXMODEI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
 (plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_MMX || (TARGET_SSE2 && mode == V1DImode))
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODE12 0 "register_operand")
(sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand")
- (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
 (sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand" "0,0,Yv")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 05/41] i386: Emulate MMX mulv4hi3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_mulv4hi3): Also allow
TARGET_MMX_WITH_SSE.
(mulv4hi3): New.
(*mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
support.
---
 gcc/config/i386/mmx.md | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 587e31b299e..fd0189eae60 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -716,19 +716,31 @@
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
-  (match_operand:V4HI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
+  (match_operand:V4HI 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_expand "mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+(mult:V4HI (match_operand:V4HI 1 "register_operand")
+  (match_operand:V4HI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_mulv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
-  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmullw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+  (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmullw\t{%2, %0|%0, %2}
+   pmullw\t{%2, %0|%0, %2}
+   vpmullw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_smulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 00/41] V8: Emulate MMX intrinsics with SSE

2019-02-16 Thread H.J. Lu
On x86-64, since __m64 is returned and passed in XMM registers, we can
emulate MMX intrinsics with SSE instructions. To support it, we added

 #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)

;; Define instruction set of MMX instructions
(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
  (const_string "base"))

 (eq_attr "mmx_isa" "native")
   (symbol_ref "!TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64")
   (symbol_ref "TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64_avx")
   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
 (eq_attr "mmx_isa" "x64_noavx")
   (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")

We added SSE emulation to MMX patterns and disabled MMX alternatives with
TARGET_MMX_WITH_SSE.

Most of MMX instructions have equivalent SSE versions and results of some
SSE versions need to be reshuffled to the right order for MMX.  Thee are
couple tricky cases:

1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
mask operand and handle unmapped bits 64:127 at memory address by
adjusting source and mask operands together with memory address.

2. MMX movntq is emulated with SSE2 DImode movnti, which is available
in 64-bit mode.

3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
SSE emulation must clear the bit 4 in the shuffle control mask.

4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
the upper 64 bits of destination XMM register.

Tests are also added to check each SSE emulation of MMX intrinsics.

There are no regressions on i686 and x86-64.  For x86-64, GCC is also
tested with

--with-arch=native --with-cpu=native

on AVX2 and AVX512F machines.

H.J. Lu (40):
  i386: Allow MMX register modes in SSE registers
  i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
  i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
  i386: Emulate MMX plusminus/sat_plusminus with SSE
  i386: Emulate MMX mulv4hi3 with SSE
  i386: Emulate MMX smulv4hi3_highpart with SSE
  i386: Emulate MMX mmx_pmaddwd with SSE
  i386: Emulate MMX ashr3/3 with SSE
  i386: Emulate MMX 3 with SSE
  i386: Emulate MMX mmx_andnot3 with SSE
  i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
  i386: Emulate MMX vec_dupv2si with SSE
  i386: Emulate MMX pshufw with SSE
  i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
  i386: Emulate MMX sse_cvtpi2ps with SSE
  i386: Emulate MMX mmx_pextrw with SSE
  i386: Emulate MMX mmx_pinsrw with SSE
  i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
  i386: Emulate MMX mmx_pmovmskb with SSE
  i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
  i386: Emulate MMX maskmovq with SSE2 maskmovdqu
  i386: Emulate MMX mmx_uavgv8qi3 with SSE
  i386: Emulate MMX mmx_uavgv4hi3 with SSE
  i386: Emulate MMX mmx_psadbw with SSE
  i386: Emulate MMX movntq with SSE2 movntidi
  i386: Emulate MMX umulv1siv1di3 with SSE2
  i386: Make _mm_empty () as NOP for TARGET_MMX_WITH_SSE
  i386: Emulate MMX ssse3_phwv4hi3 with SSE
  i386: Emulate MMX ssse3_phdv2si3 with SSE
  i386: Emulate MMX ssse3_pmaddubsw with SSE
  i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
  i386: Emulate MMX pshufb with SSE version
  i386: Emulate MMX ssse3_psign3 with SSE
  i386: Emulate MMX ssse3_palignrdi with SSE
  i386: Emulate MMX abs2 with SSE
  i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
  i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
  i386: Allow MMX intrinsic emulation with SSE
  i386: Enable TM MMX intrinsics with SSE2
  i386: Add tests for MMX intrinsic emulations with SSE

Uros Bizjak (1):
  Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE

 gcc/config/i386/constraints.md|   6 +
 gcc/config/i386/i386-builtin.def  | 126 +--
 gcc/config/i386/i386-c.c  |   2 +
 gcc/config/i386/i386-protos.h |   4 +
 gcc/config/i386/i386.c| 189 +++-
 gcc/config/i386/i386.h|   2 +
 gcc/config/i386/i386.md   |  17 +
 gcc/config/i386/mmintrin.h|  12 +-
 gcc/config/i386/mmx.md| 984 --
 gcc/config/i386/predicates.md |   7 +
 gcc/config/i386/sse.md| 359 +--
 gcc/config/i386/xmmintrin.h   |  61 ++
 gcc/testsuite/gcc.target/i386/mmx-vals.h  |  77 ++
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c   |  43 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c   |  39 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c   |  42 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c   |  40 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c   |  31 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c   |  36 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c   |  40 +
 gcc/testsuite/gcc.targe

[PATCH 07/41] i386: Emulate MMX mmx_pmaddwd with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX pmaddwd with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.
(*mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 01c80602b5b..fe746a487d1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -810,11 +810,11 @@
  (mult:V2SI
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 1 "nonimmediate_operand")
+   (match_operand:V4HI 1 "register_mmxmem_operand")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 2 "nonimmediate_operand")
+   (match_operand:V4HI 2 "register_mmxmem_operand")
(parallel [(const_int 0) (const_int 2)]
  (mult:V2SI
(sign_extend:V2SI
@@ -823,20 +823,20 @@
(sign_extend:V2SI
  (vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_pmaddwd"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
 (plus:V2SI
  (mult:V2SI
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)]
  (mult:V2SI
(sign_extend:V2SI
@@ -845,10 +845,15 @@
(sign_extend:V2SI
  (vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmaddwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmaddwd\t{%2, %0|%0, %2}
+   pmaddwd\t{%2, %0|%0, %2}
+   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmulhrwv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 06/41] i386: Emulate MMX smulv4hi3_highpart with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_smulv4hi3_highpart): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_smulv4hi3_highpart): Also allow TARGET_MMX_WITH_SSE. Add
SSE support.
---
 gcc/config/i386/mmx.md | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index fd0189eae60..01c80602b5b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -748,27 +748,32 @@
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand"))
+   (match_operand:V4HI 1 "register_mmxmem_operand"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand")))
+   (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_int 16]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_int 16]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmulhw\t{%2, %0|%0, %2}
+   pmulhw\t{%2, %0|%0, %2}
+   vpmulhw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_umulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 02/41] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2

2019-02-16 Thread H.J. Lu
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
source operand is allowed.

2019-02-08  H.J. Lu  
Uros Bizjak  

PR target/89021
* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
New prototype.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
function.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.md (mmx_isa): New.
(enabled): Also check mmx_isa.
* config/i386/mmx.md (any_s_truncate): New code iterator.
(s_trunsuffix): New code attr.
(mmx_packsswb): Removed.
(mmx_packssdw): Likewise.
(mmx_packuswb): Likewise.
(mmx_packswb): New define_insn_and_split to emulate
MMX packsswb/packuswb with SSE2.
(mmx_packssdw): Likewise.
* config/i386/predicates.md (register_mmxmem_operand): New.
---
 gcc/config/i386/i386-protos.h |  3 ++
 gcc/config/i386/i386.c| 54 
 gcc/config/i386/i386.md   | 13 +++
 gcc/config/i386/mmx.md| 67 +++
 gcc/config/i386/predicates.md |  7 
 5 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 27f5cc13abf..a53b48438ec 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, 
rtx, rtx);
 
 extern rtx ix86_split_stack_guard (void);
 
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif /* TREE_CODE  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7d7dd80930e..d31b69d9a82 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20221,6 +20221,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, 
rtx operands[])
 gcc_unreachable ();
 }
 
+/* Move bits 64:95 to bits 32:63.  */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+ GEN_INT (0), GEN_INT (0)));
+  rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op));
+  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  rtx insn = gen_rtx_SET (dest, op);
+  emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  machine_mode dmode = GET_MODE (op0);
+  machine_mode smode = GET_MODE (op1);
+  machine_mode inner_dmode = GET_MODE_INNER (dmode);
+  machine_mode inner_smode = GET_MODE_INNER (smode);
+
+  /* Get the corresponding SSE mode for destination.  */
+  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+   nunits).require ();
+  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+nunits / 2).require ();
+
+  /* Get the corresponding SSE mode for source.  */
+  nunits = 16 / GET_MODE_SIZE (inner_smode);
+  machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+   nunits).require ();
+
+  /* Generate SSE pack with signed/unsigned saturation.  */
+  rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0));
+  op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1));
+  op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2));
+
+  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+   op1, op2));
+  emit_insn (insn);
+
+  ix86_move_vector_high_sse_to_mmx (op0);
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 240384917df..04ec0eeaa57 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,10 @@
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
+  (const_string "base"))
+
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
 (eq_attr "isa" "x64_sse2")
@@ -830,6 +834,15 @@
 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
 (eq_attr "isa" "avx512vl") (symbol_

[PATCH 08/41] i386: Emulate MMX ashr3/3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX ashr3/3 with SSE.  Only SSE register
source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_ashr3): Also allow
TARGET_MMX_WITH_SSE.  Add SSE emulation.
(mmx_3): Likewise.
(ashr3): New.
(3): Likewise.
---
 gcc/config/i386/mmx.md | 50 ++
 1 file changed, 36 insertions(+), 14 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index fe746a487d1..6af05a1881e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -959,32 +959,54 @@
(set_attr "mode" "DI")])
 
 (define_insn "mmx_ashr3"
-  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
 (ashiftrt:MMXMODE24
- (match_operand:MMXMODE24 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
-  "psra\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   psra\t{%2, %0|%0, %2}
+   psra\t{%2, %0|%0, %2}
+   vpsra\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
  (if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
+
+(define_expand "ashr3"
+  [(set (match_operand:MMXMODE24 0 "register_operand")
+(ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_MMX_WITH_SSE")
 
 (define_insn "mmx_3"
-  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
 (any_lshift:MMXMODE248
- (match_operand:MMXMODE248 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
  (if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
+
+(define_expand "3"
+  [(set (match_operand:MMXMODE248 0 "register_operand")
+(any_lshift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_MMX_WITH_SSE")
 
 ;
 ;;
-- 
2.20.1



[PATCH 10/41] i386: Emulate MMX mmx_andnot3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mmx_andnot3 with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (mmx_andnot3): Also allow
TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 33f6c2aa774..b3df46dd563 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1049,14 +1049,18 @@
 ;
 
 (define_insn "mmx_andnot3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(and:MMXMODEI
- (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pandn\t{%2, %0|%0, %2}
+   pandn\t{%2, %0|%0, %2}
+   vpandn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
-- 
2.20.1



[PATCH 09/41] i386: Emulate MMX 3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX 3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (any_logic:mmx_3): Also allow
TARGET_MMX_WITH_SSE.
(any_logic:3): New.
(any_logic:*mmx_3): Also allow TARGET_MMX_WITH_SSE.
Add SSE support.
---
 gcc/config/i386/mmx.md | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 6af05a1881e..33f6c2aa774 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1061,20 +1061,33 @@
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
(any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand")
- (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (any_logic:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand")
+ (match_operand:MMXMODEI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 11/41] i386: Emulate MMX mmx_eq/mmx_gt3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mmx_eq/mmx_gt3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_eq3): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_eq3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
support.
(mmx_gt3): Likewise.
---
 gcc/config/i386/mmx.md | 43 +-
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b3df46dd563..aeebb4f5741 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1017,30 +1017,39 @@
 (define_expand "mmx_eq3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
 (eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand")
- (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (EQ, mode, operands);")
 
 (define_insn "*mmx_eq3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (EQ, mode, operands)"
-  "pcmpeq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (EQ, mode, operands)"
+  "@
+   pcmpeq\t{%2, %0|%0, %2}
+   pcmpeq\t{%2, %0|%0, %2}
+   vpcmpeq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_gt3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (gt:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgt\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pcmpgt\t{%2, %0|%0, %2}
+   pcmpgt\t{%2, %0|%0, %2}
+   vpcmpgt\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 15/41] i386: Emulate MMX sse_cvtpi2ps with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of
destination XMM register.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (sse_cvtpi2ps): Changed to
define_insn_and_split.  Also allow TARGET_MMX_WITH_SSE.  Add
SSE emulation.
---
 gcc/config/i386/sse.md | 64 --
 1 file changed, 56 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index dd3a3d9ba67..3135ce4eace 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4569,16 +4569,64 @@
 ;;
 ;
 
-(define_insn "sse_cvtpi2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn_and_split "sse_cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
(vec_merge:V4SF
  (vec_duplicate:V4SF
-   (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
- (match_operand:V4SF 1 "register_operand" "0")
- (const_int 3)))]
-  "TARGET_SSE"
-  "cvtpi2ps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
+   (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" 
"ym,x,Yv")))
+ (match_operand:V4SF 1 "register_operand" "0,0,Yv")
+ (const_int 3)))
+   (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
+  "TARGET_SSE || TARGET_MMX_WITH_SSE"
+  "@
+   cvtpi2ps\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  rtx op2 = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  /* Generate SSE2 cvtdq2ps.  */
+  rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
+  emit_insn (insn);
+
+  /* Merge operands[3] with operands[0].  */
+  rtx mask, op1;
+  if (TARGET_AVX)
+{
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (1),
+ GEN_INT (6), GEN_INT (7)));
+  op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
+  op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+  insn = gen_rtx_SET (operands[0], op2);
+}
+  else
+{
+  /* NB: SSE can only concatenate OP0 and OP3 to OP0.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (4), GEN_INT (5)));
+  op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
+  op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+  insn = gen_rtx_SET (operands[0], op2);
+  emit_insn (insn);
+
+  /* Swap bits 0:63 with bits 64:127.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (1)));
+  rtx dest = lowpart_subreg (V4SImode, operands[0],
+GET_MODE (operands[0]));
+  op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  insn = gen_rtx_SET (dest, op1);
+}
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
 
 (define_insn "sse_cvtps2pi"
-- 
2.20.1



[PATCH 16/41] i386: Emulate MMX mmx_pextrw with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mmx_pextrw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 09e78ac5f74..28725f48282 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1310,16 +1310,20 @@
(set_attr "mode" "DI")])
 
 (define_insn "mmx_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
 (zero_extend:SI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "y")
-   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
+   (match_operand:V4HI 1 "register_operand" "y,Yv")
+   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pextrw\t{%2, %1, %0|%0, %1, %2}
+   %vpextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog1")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 12/41] i386: Emulate MMX vec_dupv2si with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX vec_dupv2si with SSE.  Add the "Yw" constraint to allow
broadcast from integer register for AVX512BW with TARGET_AVX512VL.
Only SSE register source operand is allowed.

PR target/89021
* config/i386/constraints.md (Yw): New constraint.
* config/i386/mmx.md (*vec_dupv2si): Changed to
define_insn_and_split and also allow TARGET_MMX_WITH_SSE to
support SSE emulation.
---
 gcc/config/i386/constraints.md |  6 ++
 gcc/config/i386/mmx.md | 24 +---
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 16075b4acf3..c546b20d9dc 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,6 +110,8 @@
 ;;  v  any EVEX encodable SSE register for AVX512VL target,
 ;; otherwise any SSE register
 ;;  h  EVEX encodable SSE register with number factor of four
+;;  w  any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
+;; target.
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -146,6 +148,10 @@
  "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
  "@internal For AVX512VL, any EVEX encodable SSE register 
(@code{%xmm0-%xmm31}), otherwise any SSE register.")
 
+(define_register_constraint "Yw"
+ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW 
with TARGET_AVX512VL target.")
+
 ;; We use the B prefix to denote any number of internal operands:
 ;;  f  FLAGS_REG
 ;;  g  GOT memory operand.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index aeebb4f5741..b441f36dfc6 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1381,14 +1381,24 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "DI")])
 
-(define_insn "*vec_dupv2si"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
(vec_duplicate:V2SI
- (match_operand:SI 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+ (match_operand:SI 1 "register_operand" "0,0,Yv,r")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+   (vec_duplicate:V4SI (match_dup 1)))]
+  "operands[0] = lowpart_subreg (V4SImode, operands[0],
+GET_MODE (operands[0]));"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI,TI")])
 
 (define_insn "*mmx_concatv2si"
   [(set (match_operand:V2SI 0 "register_operand" "=y,y")
-- 
2.20.1



[PATCH 14/41] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.

PR target/89021
* config/i386/sse.md (sse_cvtps2pi): Add SSE emulation.
(sse_cvttps2pi): Likewise.
---
 gcc/config/i386/sse.md | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 30bf7e23122..dd3a3d9ba67 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4582,26 +4582,32 @@
(set_attr "mode" "V4SF")])
 
 (define_insn "sse_cvtps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" 
"xm,YvBm")]
   UNSPEC_FIX_NOTRUNC)
  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvtps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+  "@
+   cvtps2pi\t{%1, %0|%0, %q1}
+   %vcvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
(set_attr "mode" "DI")])
 
 (define_insn "sse_cvttps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvttps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+  "@
+   cvttps2pi\t{%1, %0|%0, %q1}
+   %vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
(set_attr "prefix_rep" "0")
(set_attr "mode" "SF")])
 
-- 
2.20.1



[PATCH 17/41] i386: Emulate MMX mmx_pinsrw with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mmx_pinsrw with SSE.  Only SSE register destination operand
is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_pinsrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 28725f48282..dea2be1d8e2 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1282,32 +1282,45 @@
 (match_operand:SI 2 "nonimmediate_operand"))
  (match_operand:V4HI 1 "register_operand")
   (match_operand:SI 3 "const_0_to_3_operand")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
 {
   operands[2] = gen_lowpart (HImode, operands[2]);
   operands[3] = GEN_INT (1 << INTVAL (operands[3]));
 })
 
 (define_insn "*mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
 (vec_merge:V4HI
   (vec_duplicate:V4HI
-(match_operand:HI 2 "nonimmediate_operand" "rm"))
- (match_operand:V4HI 1 "register_operand" "0")
+(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
   (match_operand:SI 3 "const_int_operand")))]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (V4HImode))"
 {
   operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
-  if (MEM_P (operands[2]))
-return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+{
+  if (MEM_P (operands[2]))
+   return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  else
+   return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
   else
-return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+{
+  if (MEM_P (operands[2]))
+   return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  else
+   return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_pextrw"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1



[PATCH 33/41] i386: Emulate MMX ssse3_psign3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX ssse3_psign3 with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_psign3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 79b35d95424..1d90af0a4b0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15786,17 +15786,21 @@
(set_attr "mode" "")])
 
 (define_insn "ssse3_psign3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(unspec:MMXMODEI
- [(match_operand:MMXMODEI 1 "register_operand" "0")
-  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
  UNSPEC_PSIGN))]
-  "TARGET_SSSE3"
-  "psign\t{%2, %0|%0, %2}";
-  [(set_attr "type" "sselog1")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   psign\t{%2, %0|%0, %2}
+   psign\t{%2, %0|%0, %2}
+   vpsign\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_palignr_mask"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
-- 
2.20.1



[PATCH 40/41] i386: Enable TM MMX intrinsics with SSE2

2019-02-16 Thread H.J. Lu
This pach enables TM MMX intrinsics with SSE2 when MMX is disabled.

PR target/89021
* config/i386/i386.c (bdesc_tm): Enable MMX intrinsics with
SSE2.
---
 gcc/config/i386/i386.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 073a2534d1f..319a98f824a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31065,13 +31065,13 @@ static const struct builtin_description 
bdesc_##kind[] =  \
we're lazy.  Add casts to make them fit.  */
 static const struct builtin_description bdesc_tm[] =
 {
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, 
VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, 
UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, 
UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, 
V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
 
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum 
ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
@@ -31089,7 +31089,7 @@ static const struct builtin_description bdesc_tm[] =
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
 
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, 
VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
 };
-- 
2.20.1



[PATCH 13/41] i386: Emulate MMX pshufw with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX pshufw with SSE.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pshufw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(mmx_pshufw_1): Add SSE emulation.
(*vec_dupv4hi): Changed to define_insn_and_split and also allow
TARGET_MMX_WITH_SSE to support SSE emulation.
---
 gcc/config/i386/mmx.md | 81 +-
 1 file changed, 65 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b441f36dfc6..09e78ac5f74 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1323,9 +1323,10 @@
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-   (match_operand:V4HI 1 "nonimmediate_operand")
+   (match_operand:V4HI 1 "register_mmxmem_operand")
(match_operand:SI 2 "const_int_operand")]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
 {
   int mask = INTVAL (operands[2]);
   emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
@@ -1337,14 +1338,15 @@
 })
 
 (define_insn "mmx_pshufw_1"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yv")
 (vec_select:V4HI
-  (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+  (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yv")
   (parallel [(match_operand 2 "const_0_to_3_operand")
  (match_operand 3 "const_0_to_3_operand")
  (match_operand 4 "const_0_to_3_operand")
  (match_operand 5 "const_0_to_3_operand")])))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
 {
   int mask = 0;
   mask |= INTVAL (operands[2]) << 0;
@@ -1353,11 +1355,20 @@
   mask |= INTVAL (operands[5]) << 6;
   operands[2] = GEN_INT (mask);
 
-  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+  switch (which_alternative)
+{
+case 0:
+  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+case 1:
+  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+default:
+  gcc_unreachable ();
+}
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_insn "mmx_pswapdv2si2"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -1370,16 +1381,54 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
 
-(define_insn "*vec_dupv4hi"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yv,Yw")
(vec_duplicate:V4HI
  (truncate:HI
-   (match_operand:SI 1 "register_operand" "0"]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pshufw\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (match_operand:SI 1 "register_operand" "0,Yv,r"]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pshufw\t{$0, %0, %0|%0, %0, 0}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  rtx op;
+  operands[0] = lowpart_subreg (V8HImode, operands[0],
+   GET_MODE (operands[0]));
+  if (TARGET_AVX2)
+{
+  operands[1] = lowpart_subreg (HImode, operands[1],
+   GET_MODE (operands[1]));
+  op = gen_rtx_VEC_DUPLICATE (V8HImode, operands[1]);
+}
+  else
+{
+  operands[1] = lowpart_subreg (V8HImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (8,
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (4),
+ GEN_INT (5),
+ GEN_INT (6),
+ GEN_INT (7)));
+
+  op = gen_rtx_VEC_SELECT (V8HImode, operands[1], mask);
+}
+  rtx insn = gen_rtx_SET (operands[0], op);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64,x64_avx")
+   (set_attr "type" "mmxcvt,sselog1,ssemov")
+   (set_attr "length_immediate" "1,1,0")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "*vec_dupv2si"
   [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
-- 
2.20.1



[PATCH 27/41] i386: Make _mm_empty () as NOP for TARGET_MMX_WITH_SSE

2019-02-16 Thread H.J. Lu
With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
for TARGET_MMX_WITH_SSE.

PR target/89021
* config/i386/mmx.md (mmx_): Renamed to ...
(*mmx_): This.
(mmx_): New expander.
---
 gcc/config/i386/mmx.md | 30 +-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f27513f7f2c..8b5fe3948e2 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1849,7 +1849,35 @@
   [(UNSPECV_EMMS "emms")
(UNSPECV_FEMMS "femms")])
 
-(define_insn "mmx_"
+(define_expand "mmx_"
+  [(parallel
+[(unspec_volatile [(const_int 0)] EMMS)
+  (clobber (reg:XF ST0_REG))
+  (clobber (reg:XF ST1_REG))
+  (clobber (reg:XF ST2_REG))
+  (clobber (reg:XF ST3_REG))
+  (clobber (reg:XF ST4_REG))
+  (clobber (reg:XF ST5_REG))
+  (clobber (reg:XF ST6_REG))
+  (clobber (reg:XF ST7_REG))
+  (clobber (reg:DI MM0_REG))
+  (clobber (reg:DI MM1_REG))
+  (clobber (reg:DI MM2_REG))
+  (clobber (reg:DI MM3_REG))
+  (clobber (reg:DI MM4_REG))
+  (clobber (reg:DI MM5_REG))
+  (clobber (reg:DI MM6_REG))
+  (clobber (reg:DI MM7_REG))])]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+{
+   if (TARGET_MMX_WITH_SSE)
+ {
+   emit_insn (gen_nop ());
+   DONE;
+ }
+})
+
+(define_insn "*mmx_"
   [(unspec_volatile [(const_int 0)] EMMS)
(clobber (reg:XF ST0_REG))
(clobber (reg:XF ST1_REG))
-- 
2.20.1



[PATCH 31/41] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX ssse3_pmulhrswv4hi3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pmulhrswv4hi3): Require TARGET_MMX
or TARGET_MMX_WITH_SSE.
(*ssse3_pmulhrswv4hi3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e8d9bec9766..b08a577d1e4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15670,38 +15670,44 @@
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand"))
+   (match_operand:V4HI 1 "register_mmxmem_operand"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand")))
+   (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_int 14))
  (match_dup 3))
(const_int 1]
-  "TARGET_SSSE3"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
 {
   operands[3] = CONST1_RTX(V4HImode);
   ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
 })
 
 (define_insn "*ssse3_pmulhrswv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_int 14))
  (match_operand:V4HI 3 "const1_operand"))
(const_int 1]
-  "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "pmulhrsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && TARGET_SSSE3
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   pmulhrsw\t{%2, %0|%0, %2}
+   pmulhrsw\t{%2, %0|%0, %2}
+   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_pshufb3"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
-- 
2.20.1



[PATCH 20/41] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mmx_umulv4hi3_highpart with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_umulv4hi3_highpart): Also check
TARGET_MMX and TARGET_MMX_WITH_SSE.
(*mmx_umulv4hi3_highpart): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5ae04de205d..5a342256cbc 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -781,28 +781,34 @@
  (lshiftrt:V4SI
(mult:V4SI
  (zero_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand"))
+   (match_operand:V4HI 1 "register_mmxmem_operand"))
  (zero_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand")))
+   (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_int 16]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(mult:V4SI
  (zero_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
  (zero_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
  (const_int 16]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "@
+   pmulhuw\t{%2, %0|%0, %2}
+   pmulhuw\t{%2, %0|%0, %2}
+   vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand")
-- 
2.20.1



[PATCH 19/41] i386: Emulate MMX mmx_pmovmskb with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb
from QImode to SImode.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pmovmskb): Changed to
define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/mmx.md | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index edfb8623701..5ae04de205d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1763,14 +1763,30 @@
   [(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
 
-(define_insn "mmx_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+(define_insn_and_split "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
   UNSPEC_MOVMSK))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pmovmskb\t{%1, %0|%0, %1}
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
+   (set (match_dup 0)
+   (zero_extend:SI (match_dup 2)))]
+{
+  /* Generate SSE pmovmskb and zero-extend from QImode to SImode.  */
+  operands[1] = lowpart_subreg (V16QImode, operands[1],
+   GET_MODE (operands[1]));
+  operands[2] = lowpart_subreg (QImode, operands[0],
+   GET_MODE (operands[0]));
+}
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,ssemov")
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_maskmovq"
   [(set (match_operand:V8QI 0 "memory_operand")
-- 
2.20.1



[PATCH 25/41] i386: Emulate MMX movntq with SSE2 movntidi

2019-02-16 Thread H.J. Lu
Emulate MMX movntq with SSE2 movntidi.  Only register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (sse_movntq): Add SSE2 emulation.
---
 gcc/config/i386/mmx.md | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 098e41e19c3..b06f0af984a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -214,12 +214,16 @@
 })
 
 (define_insn "sse_movntq"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
-   (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
+  [(set (match_operand:DI 0 "memory_operand" "=m,m")
+   (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
   UNSPEC_MOVNTQ))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "movntq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxmov")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   movntq\t{%1, %0|%0, %1}
+   movnti\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxmov,ssemov")
(set_attr "mode" "DI")])
 
 ;
-- 
2.20.1



[PATCH 28/41] i386: Emulate MMX ssse3_phwv4hi3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX ssse3_phwv4hi3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phwv4hi3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3135ce4eace..5f29f2c3595 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15243,13 +15243,13 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phwv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_concat:V4HI
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "0")
+   (match_operand:V4HI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
@@ -15258,19 +15258,37 @@
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))]
-  "TARGET_SSSE3"
-  "phw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   phw\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = lowpart_subreg (V8HImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx op1 = lowpart_subreg (V8HImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx op2 = lowpart_subreg (V8HImode, operands[2],
+   GET_MODE (operands[2]));
+  emit_insn (gen_ssse3_phwv8hi3 (op0, op1, op2));
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_phdv8si3"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
-- 
2.20.1



[PATCH 35/41] i386: Emulate MMX abs2 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX abs2 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/sse.md (abs2): Add SSE emulation.
---
 gcc/config/i386/sse.md | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b69a467291c..97ec3795b82 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15973,16 +15973,19 @@
 })
 
 (define_insn "abs2"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
(abs:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
-  "TARGET_SSSE3"
-  "pabs\t{%1, %0|%0, %1}";
-  [(set_attr "type" "sselog1")
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pabs\t{%1, %0|%0, %1}
+   %vpabs\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "sselog1")
(set_attr "prefix_rep" "0")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 29/41] i386: Emulate MMX ssse3_phdv2si3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX ssse3_phdv2si3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phdv2si3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5f29f2c3595..551a1cb1eb2 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15367,26 +15367,44 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phdv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phdv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_concat:V2SI
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])]
-  "TARGET_SSSE3"
-  "phd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   phd\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = lowpart_subreg (V4SImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx op1 = lowpart_subreg (V4SImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx op2 = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  emit_insn (gen_ssse3_phdv4si3 (op0, op1, op2));
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_pmaddubsw256"
   [(set (match_operand:V16HI 0 "register_operand" "=x,v")
-- 
2.20.1



[PATCH 38/41] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE

2019-02-16 Thread H.J. Lu
PR target/89021
* config/i386/i386.c (ix86_expand_vector_init_duplicate): Set
mmx_ok to true if TARGET_MMX_WITH_SSE is true.
(ix86_expand_vector_init_one_nonzero): Likewise.
(ix86_expand_vector_init_one_var): Likewise.
(ix86_expand_vector_init_general): Likewise.
(ix86_expand_vector_init): Likewise.
(ix86_expand_vector_set): Likewise.
(ix86_expand_vector_extract): Likewise.
* config/i386/mmx.md (*vec_dupv2sf): Changed to
define_insn_and_split to support SSE emulation.
(*vec_extractv2sf_0): Likewise.
(*vec_extractv2sf_1): Likewise.
(*vec_extractv2si_0): Likewise.
(*vec_extractv2si_1): Likewise.
(*vec_extractv2si_zext_mem): Likewise.
(vec_setv2sf): Also allow TARGET_MMX_WITH_SSE.
(vec_extractv2sf_1 splitter): Likewise.
(vec_extractv2sfsf): Likewise.
(vec_setv2si): Likewise.
(vec_extractv2si_1 splitter): Likewise.
(vec_extractv2sisi): Likewise.
(vec_setv4hi): Likewise.
(vec_extractv4hihi): Likewise.
(vec_setv8qi): Likewise.
(vec_extractv8qiqi): Likewise.
---
 gcc/config/i386/i386.c |  8 +
 gcc/config/i386/mmx.md | 66 +++---
 2 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a76c17beece..25e0dc43a9e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42620,6 +42620,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
 {
   bool ok;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SImode:
@@ -42779,6 +42780,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, 
machine_mode mode,
   bool use_vector_set = false;
   rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2DImode:
@@ -42972,6 +42974,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, 
machine_mode mode,
   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2DFmode:
@@ -43357,6 +43360,7 @@ ix86_expand_vector_init_general (bool mmx_ok, 
machine_mode mode,
   machine_mode quarter_mode = VOIDmode;
   int n, i;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SFmode:
@@ -43556,6 +43560,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx 
vals)
   int i;
   rtx x;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
+
   /* Handle first initialization from vector elts.  */
   if (n_elts != XVECLEN (vals, 0))
 {
@@ -43655,6 +43661,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx 
val, int elt)
   machine_mode mmode = VOIDmode;
   rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SFmode:
@@ -44010,6 +44017,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, 
rtx vec, int elt)
   bool use_vec_extr = false;
   rtx tmp;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SImode:
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c612d6b9e5c..f98952fd8a0 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -555,14 +555,23 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
 
-(define_insn "*vec_dupv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(vec_duplicate:V2SF
- (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+ (match_operand:SF 1 "register_operand" "0,0,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+   (vec_duplicate:V4SF (match_dup 1)))]
+  "operands[0] = lowpart_subreg (V4SFmode, operands[0],
+GET_MODE (operands[0]));"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "*mmx_concatv2sf"
   [(set (match_operand:V2SF 0 "register_operand" "=y,y")
@@ -580,7 +589,7 @@
   [(match_operand:V2SF 0 "register_operand")
(match_operand:SF 1 "register_operand")
(match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_set (false, operands[0], operands[1],
  INTVAL (operands[2]));
@@ -594,11 +603,13 @@
(vec_select:SF
  (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
  (parallel [(const_int 0)])))]
-  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && !(MEM_

[PATCH 30/41] i386: Emulate MMX ssse3_pmaddubsw with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX ssse3_pmaddubsw with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pmaddubsw): Add SSE emulation.
---
 gcc/config/i386/sse.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 551a1cb1eb2..e8d9bec9766 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1,17 +1,17 @@
(set_attr "mode" "TI")])
 
 (define_insn "ssse3_pmaddubsw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(ss_plus:V4HI
  (mult:V4HI
(zero_extend:V4HI
  (vec_select:V4QI
-   (match_operand:V8QI 1 "register_operand" "0")
+   (match_operand:V8QI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0) (const_int 2)
   (const_int 4) (const_int 6)])))
(sign_extend:V4HI
  (vec_select:V4QI
-   (match_operand:V8QI 2 "nonimmediate_operand" "ym")
+   (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)
   (const_int 4) (const_int 6)]
  (mult:V4HI
@@ -15577,13 +15577,17 @@
  (vec_select:V4QI (match_dup 2)
(parallel [(const_int 1) (const_int 3)
   (const_int 5) (const_int 7)]))]
-  "TARGET_SSSE3"
-  "pmaddubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pmaddubsw\t{%2, %0|%0, %2}
+   pmaddubsw\t{%2, %0|%0, %2}
+   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_mode_iterator PMULHRSW
   [V8HI (V16HI "TARGET_AVX2")])
-- 
2.20.1



[PATCH 26/41] i386: Emulate MMX umulv1siv1di3 with SSE2

2019-02-16 Thread H.J. Lu
Emulate MMX umulv1siv1di3 with SSE2.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (sse2_umulv1siv1di3): Add SSE emulation
support.
(*sse2_umulv1siv1di3): Add SSE2 emulation.
---
 gcc/config/i386/mmx.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b06f0af984a..f27513f7f2c 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -905,30 +905,36 @@
 (mult:V1DI
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand")
+ (match_operand:V2SI 1 "register_mmxmem_operand")
  (parallel [(const_int 0)])))
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand")
+ (match_operand:V2SI 2 "register_mmxmem_operand")
  (parallel [(const_int 0)])]
-  "TARGET_SSE2"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
 
 (define_insn "*sse2_umulv1siv1di3"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
+  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
 (mult:V1DI
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SI 1 "register_mmxmem_operand" "%0,0,Yv")
  (parallel [(const_int 0)])))
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
  (parallel [(const_int 0)])]
-  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && TARGET_SSE2
+   && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+  "@
+   pmuludq\t{%2, %0|%0, %2}
+   pmuludq\t{%2, %0|%0, %2}
+   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 36/41] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE

2019-02-16 Thread H.J. Lu
From: Uros Bizjak 

2019-02-14  Uroš Bizjak  

PR target/89021
* config/i386/i386.md (*zero_extendsidi2): Add mmx_isa attribute.
* config/i386/sse.md (*vec_concatv2sf_sse4_1): Ditto.
(*vec_concatv2sf_sse): Ditto.
(*vec_concatv2si_sse4_1): Ditto.
(*vec_concatv2si): Ditto.
(*vec_concatv4si_0): Ditto.
(*vec_concatv2di_0): Ditto.
---
 gcc/config/i386/i386.md |  4 
 gcc/config/i386/sse.md  | 16 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 04ec0eeaa57..4cbbd4cf685 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3683,6 +3683,10 @@
  (const_string "avx512bw")
   ]
   (const_string "*")))
+   (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "5,6")
+  (const_string "native")
+  (const_string "*")))
(set (attr "type")
  (cond [(eq_attr "alternative" "0,1,2,4")
  (const_string "multi")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 97ec3795b82..4b415d255e0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -7209,6 +7209,10 @@
  (const_string "mmxmov")
   ]
   (const_string "sselog")))
+   (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "7,8")
+  (const_string "native")
+  (const_string "*")))
(set (attr "prefix_data16")
  (if_then_else (eq_attr "alternative" "3,4")
   (const_string "1")
@@ -7244,7 +7248,8 @@
movss\t{%1, %0|%0, %1}
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+  [(set_attr "mmx_isa" "*,*,native,native")
+   (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "V4SF,SF,DI,DI")])
 
 (define_insn "*vec_concatv4sf"
@@ -14520,6 +14525,10 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
   [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
+   (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "8,9")
+  (const_string "native")
+  (const_string "*")))
(set (attr "type")
  (cond [(eq_attr "alternative" "7")
  (const_string "ssemov")
@@ -14557,6 +14566,7 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
   [(set_attr "isa" "sse2,sse2,*,*,*,*")
+   (set_attr "mmx_isa" "*,*,*,*,native,native")
(set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
 
@@ -14586,7 +14596,8 @@
   "@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
+  [(set_attr "mmx_isa" "*,native")
+   (set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex,orig")
(set_attr "mode" "TI")])
 
@@ -14661,6 +14672,7 @@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
   [(set_attr "isa" "x64,*,*")
+   (set_attr "mmx_isa" "*,*,native")
(set_attr "type" "ssemov")
(set_attr "prefix_rex" "1,*,*")
(set_attr "prefix" "maybe_vex,maybe_vex,orig")
-- 
2.20.1



[PATCH 39/41] i386: Allow MMX intrinsic emulation with SSE

2019-02-16 Thread H.J. Lu
Allow MMX intrinsic emulation with SSE/SSE2/SSSE3.  Don't enable MMX ISA
by default with TARGET_MMX_WITH_SSE.

For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
mode since MMX intrinsics can be emulated wit SSE.

gcc/

PR target/89021
* config/i386/i386-builtin.def: Enable MMX intrinsics with
SSE/SSE2/SSSE3.
* config/i386/i386.c (ix86_init_mmx_sse_builtins): Likewise.
(ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
intrinsics with TARGET_MMX_WITH_SSE.
* config/i386/mmintrin.h: Only require SSE2 if __MMX_WITH_SSE__
is defined.

gcc/testsuite/

PR target/89021
* gcc.target/i386/pr82483-1.c: Error only on ia32.
* gcc.target/i386/pr82483-2.c: Likewise.
---
 gcc/config/i386/i386-builtin.def  | 126 +++---
 gcc/config/i386/i386.c|  29 -
 gcc/config/i386/mmintrin.h|  12 ++-
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 5 files changed, 101 insertions(+), 70 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 88005f4687f..10a9d631f29 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", 
IX86_BUILTIN_FNSTSW, UNKN
 BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, 
UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", 
IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, 
"__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* 3DNow! */
 BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", 
IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", 
IX86_BUILTIN_RORQI, UNKNO
 BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, 
UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", 
IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", 
IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", 
IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", 
IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", 
IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", 
IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, 
"__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, 
"__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, 
"__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, 
"__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, 
"__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, 
"__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, 
"__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, 
"__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", 
IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, 
"__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", 
IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, 
"__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", 
IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", 
IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI

[PATCH 18/41] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_v4hi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(mmx_v8qi3): Likewise.
(smaxmin:v4hi3): New.
(umaxmin:v8qi3): Likewise.
(smaxmin:*mmx_v4hi3): Add SSE emulation.
(umaxmin:*mmx_v8qi3): Likewise.
---
 gcc/config/i386/mmx.md | 68 +-
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index dea2be1d8e2..edfb8623701 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -923,40 +923,68 @@
 (define_expand "mmx_v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
 (smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand")
- (match_operand:V4HI 2 "nonimmediate_operand")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+ (match_operand:V4HI 1 "register_mmxmem_operand")
+ (match_operand:V4HI 2 "register_mmxmem_operand")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);")
+
+(define_expand "v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+(smaxmin:V4HI
+ (match_operand:V4HI 1 "register_operand")
+ (match_operand:V4HI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);")
 
 (define_insn "*mmx_v4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
 (smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (, V4HImode, operands)"
-  "pw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   pw\t{%2, %0|%0, %2}
+   pw\t{%2, %0|%0, %2}
+   vpw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
 (umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand")
- (match_operand:V8QI 2 "nonimmediate_operand")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+ (match_operand:V8QI 1 "register_mmxmem_operand")
+ (match_operand:V8QI 2 "register_mmxmem_operand")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);")
+
+(define_expand "v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+(umaxmin:V8QI
+ (match_operand:V8QI 1 "register_operand")
+ (match_operand:V8QI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);")
 
 (define_insn "*mmx_v8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
 (umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (, V8QImode, operands)"
-  "pb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   pb\t{%2, %0|%0, %2}
+   pb\t{%2, %0|%0, %2}
+   vpb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_ashr3"
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
-- 
2.20.1



[PATCH 24/41] i386: Emulate MMX mmx_psadbw with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mmx_psadbw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_psadbw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index d647dc28baa..098e41e19c3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1771,14 +1771,19 @@
(set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
-(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
+(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" 
"ym,x,Yv")]
 UNSPEC_PSADBW))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   psadbw\t{%2, %0|%0, %2}
+   psadbw\t{%2, %0|%0, %2}
+   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1



[PATCH 32/41] i386: Emulate MMX pshufb with SSE version

2019-02-16 Thread H.J. Lu
Emulate MMX version of pshufb with SSE version by masking out the bit 3
of the shuffle control byte.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pshufbv8qi3): Changed to
define_insn_and_split.  Also allow TARGET_MMX_WITH_SSE.  Add
SSE emulation.
---
 gcc/config/i386/sse.md | 46 +-
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b08a577d1e4..79b35d95424 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15728,17 +15728,45 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "")])
 
-(define_insn "ssse3_pshufbv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-   (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
-UNSPEC_PSHUFB))]
-  "TARGET_SSSE3"
-  "pshufb\t{%2, %0|%0, %2}";
-  [(set_attr "type" "sselog1")
+(define_insn_and_split "ssse3_pshufbv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+   (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" 
"ym,x,Yv")]
+UNSPEC_PSHUFB))
+   (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pshufb\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 3)
+   (and:V4SI (match_dup 3) (match_dup 2)))
+   (set (match_dup 0)
+   (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
+{
+  /* Emulate MMX version of pshufb with SSE version by masking out the
+ bit 3 of the shuffle control byte.  */
+  operands[0] = lowpart_subreg (V16QImode, operands[0],
+   GET_MODE (operands[0]));
+  operands[1] = lowpart_subreg (V16QImode, operands[1],
+   GET_MODE (operands[1]));
+  operands[2] = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  operands[4] = lowpart_subreg (V16QImode, operands[3],
+   GET_MODE (operands[3]));
+  rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7));
+  rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
+  operands[5] = force_const_mem (V4SImode, vec_const);
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_psign3"
   [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
-- 
2.20.1



[PATCH 34/41] i386: Emulate MMX ssse3_palignrdi with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX version of palignrq with SSE version by concatenating 2
64-bit MMX operands into a single 128-bit SSE operand, followed by
SSE psrldq.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_palignrdi): Changed to
define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 58 ++
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1d90af0a4b0..b69a467291c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15855,23 +15855,61 @@
(set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "")])
 
-(define_insn "ssse3_palignrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-   (unspec:DI [(match_operand:DI 1 "register_operand" "0")
-   (match_operand:DI 2 "nonimmediate_operand" "ym")
-   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+(define_insn_and_split "ssse3_palignrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
+   (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
+   (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
+   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
   UNSPEC_PALIGNR))]
-  "TARGET_SSSE3"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
 {
-  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
-  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+  switch (which_alternative)
+{
+case 0:
+  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+case 1:
+case 2:
+  return "#";
+default:
+  gcc_unreachable ();
+}
 }
-  [(set_attr "type" "sseishft")
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+   (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
+{
+  /* Emulate MMX palignrdi with SSE psrldq.  */
+  rtx op0 = lowpart_subreg (V2DImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx insn;
+  if (TARGET_AVX)
+insn = gen_vec_concatv2di (op0, operands[2], operands[1]);
+  else
+{
+  /* NB: SSE can only concatenate OP0 and OP1 to OP0.  */
+  insn = gen_vec_concatv2di (op0, operands[1], operands[2]);
+  emit_insn (insn);
+  /* Swap bits 0:63 with bits 64:127.  */
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2),
+ GEN_INT (3),
+ GEN_INT (0),
+ GEN_INT (1)));
+  rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
+  rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
+  insn = gen_rtx_SET (op1, op2);
+}
+  emit_insn (insn);
+  operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
 ;; modes for abs instruction on pre AVX-512 targets.
-- 
2.20.1



[PATCH 37/41] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE

2019-02-16 Thread H.J. Lu
PR target/89021
* config/i386/mmx.md (MMXMODE:mov): Also allow
TARGET_MMX_WITH_SSE.
(MMXMODE:*mov_internal): Likewise.
(MMXMODE:movmisalign): Likewise.
---
 gcc/config/i386/mmx.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 8b5fe3948e2..c612d6b9e5c 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -70,7 +70,7 @@
 (define_expand "mov"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
@@ -81,7 +81,7 @@
 "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r  ,?!y,v,v,v,m,r,v,!y,*x")
(match_operand:MMXMODE 1 "nonimm_or_0_operand"
 "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!y,r  ,C,v,m,v,v,r,*x,!y"))]
-  "TARGET_MMX
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -207,7 +207,7 @@
 (define_expand "movmisalign"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
-- 
2.20.1



[PATCH 22/41] i386: Emulate MMX mmx_uavgv8qi3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mmx_uavgv8qi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_uavgv8qi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(*mmx_uavgv8qi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5a342256cbc..8866354dea9 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1679,50 +1679,55 @@
(plus:V8HI
  (plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand"))
+ (match_operand:V8QI 1 "register_mmxmem_operand"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand")))
+ (match_operand:V8QI 2 "register_mmxmem_operand")))
  (const_vector:V8HI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "TARGET_SSE || TARGET_3DNOW"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
 
 (define_insn "*mmx_uavgv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(truncate:V8QI
  (lshiftrt:V8HI
(plus:V8HI
  (plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))
  (const_vector:V8HI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "(TARGET_SSE || TARGET_3DNOW)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V8QImode, operands)"
 {
   /* These two instructions have the same operation, but their encoding
  is different.  Prefer the one that is de facto standard.  */
-  if (TARGET_SSE || TARGET_3DNOW_A)
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
+  else if (TARGET_SSE || TARGET_3DNOW_A)
 return "pavgb\t{%2, %0|%0, %2}";
   else
 return "pavgusb\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "mmxshft")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
(set (attr "prefix_extra")
  (if_then_else
(not (ior (match_test "TARGET_SSE")
 (match_test "TARGET_3DNOW_A")))
(const_string "1")
(const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 21/41] i386: Emulate MMX maskmovq with SSE2 maskmovdqu

2019-02-16 Thread H.J. Lu
Emulate MMX maskmovq with SSE2 maskmovdqu for TARGET_MMX_WITH_SSE by
zero-extending source and mask operands to 128 bits.  Handle unmapped
bits 64:127 at memory address by adjusting source and mask operands
together with memory address.

PR target/89021
* config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2
maskmovdqu for __MMX_WITH_SSE__.
---
 gcc/config/i386/xmmintrin.h | 61 +
 1 file changed, 61 insertions(+)

diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 58284378514..a915f6c87d7 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
 {
+#ifdef __MMX_WITH_SSE__
+  /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits
+ 64:127 at address __P.  */
+  typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+  typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+  /* Zero-extend __A and __N to 128 bits.  */
+  __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 };
+  __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 };
+
+  /* Check the alignment of __P.  */
+  __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf;
+  if (offset)
+{
+  /* If the misalignment of __P > 8, subtract __P by 8 bytes.
+Otherwise, subtract __P by the misalignment.  */
+  if (offset > 8)
+   offset = 8;
+  __P = (char *) (((__SIZE_TYPE__) __P) - offset);
+
+  /* Shift __A128 and __N128 to the left by the adjustment.  */
+  switch (offset)
+   {
+   case 1:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8);
+ break;
+   case 2:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8);
+ break;
+   case 3:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8);
+ break;
+   case 4:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8);
+ break;
+   case 5:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8);
+ break;
+   case 6:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8);
+ break;
+   case 7:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8);
+ break;
+   case 8:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8);
+ break;
+   default:
+ break;
+   }
+}
+  __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P);
+#else
   __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+#endif
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-- 
2.20.1



[PATCH 23/41] i386: Emulate MMX mmx_uavgv4hi3 with SSE

2019-02-16 Thread H.J. Lu
Emulate MMX mmx_uavgv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_uavgv4hi3): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_uavgv4hi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 8866354dea9..d647dc28baa 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1736,33 +1736,39 @@
(plus:V4SI
  (plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand"))
+ (match_operand:V4HI 1 "register_mmxmem_operand"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand")))
+ (match_operand:V4HI 2 "register_mmxmem_operand")))
  (const_vector:V4SI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
 
 (define_insn "*mmx_uavgv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
  (const_vector:V4SI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V4HImode, operands)"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "@
+   pavgw\t{%2, %0|%0, %2}
+   pavgw\t{%2, %0|%0, %2}
+   vpavgw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
   [(set (match_operand:V1DI 0 "register_operand" "=y")
-- 
2.20.1



Re: C++ PATCH for c++/89217 - ICE with list-initialization in range-based for loop

2019-02-16 Thread Marek Polacek
On Fri, Feb 15, 2019 at 01:59:10PM -1000, Jason Merrill wrote:
> On 2/11/19 6:03 PM, Marek Polacek wrote:
> > On Mon, Feb 11, 2019 at 01:43:36PM -0500, Jason Merrill wrote:
> > > On 2/7/19 6:02 PM, Marek Polacek wrote:
> > > > Since r268321 we can call digest_init even in a template, when the 
> > > > compound
> > > > literal isn't instantiation-dependent.
> > > 
> > > Right.  And since digest_init modifies the CONSTRUCTOR in place, that 
> > > means
> > > the template trees are digested rather than the original parse trees that 
> > > we
> > > try to use.  If we're going to use digest_init, we should probably save
> > > another CONSTRUCTOR with the original trees.
> > 
> > I tried unsharing the constructor and even its contents but only then did I
> > realize that this cannot work.
> 
> Why wouldn't going back to saving {*((struct S *) this)->r} work?

Sorry, I misunderstood what you meant by "saving".  I think I do now.

> > It's not digest_init that adds the problematic
> > INDIRECT_REF via convert_from_reference, it's instantiate_pending_templates
> > -> tsubst_expr -> ... -> finish_non_static_data_member.
> > 
> > So the problem isn't sharing the contents of the CONSTRUCTOR, but rather 
> > what
> > finish_non_static_data_member does with the
> > 
> >{.r=(struct R &) (struct R *) ((struct S *) this)->r}
> > 
> > expression.  The same problem would appear even before r268321 changes if we
> > called tsubst_* twice on the CONSTRUCTOR above.
> 
> Yes, it sounds like there's a bug in that path as well.  Perhaps
> tsubst_copy_and_build/COMPONENT_REF should strip a REFERENCE_REF_P if t was
> already a reference.

With this patch, this seems no longer to be needed.

> > Do you still think digest_init and/or finish_compound_literal need tweaking?
> 
> I imagine that saving post-digest trees might cause other problems, but
> perhaps not.  Perhaps we ought to move away more generally from trying to
> save the original parse trees for non-dependent expressions and messing with
> NON_DEPENDENT_EXPR.

Now that I've spent a lot of time looking into 89356 (and the other PRs broken
by the same revision), I'm convinced that we must return the original tree in
finish_compound_literal.  But we still have to call digest_init or we lose
detecting narrowing conversions.  What happens in that PR is that after
digest_init we lose the braced-init-list and that changes mangling.  I've come
up with this fix for 89356, but it also fixes this PR, and likely all the
others.

The comments hopefully explain what I'm doing and why, the only suspicious
thing is the get_target_expr_sfinae call, that is so that initlist109.C
keeps compiling; without the call to get_target_expr_sfinae, we end up
issuing an error in digest_init_r:
1224   if (COMPOUND_LITERAL_P (stripped_init) && code == ARRAY_TYPE)
1225 {
1226   if (complain & tf_error)
1227 error_at (loc, "cannot initialize aggregate of type %qT with "
1228   "a compound literal", type);

But I hope the rest of the patch is reasonable.  The LOOKUP_NO_NARROWING bit
isn't necessary but it should be a correct thing to do, so that later in
perform_implicit_conversion_flags we properly set the recently added flag
IMPLICIT_CONV_EXPR_BRACED_INIT.

Bootstrapped/regtested on x86_64-linux and ppc64le-linux, ok for trunk?

2019-02-16  Marek Polacek  

PR c++/89217 - ICE with list-initialization in range-based for loop.
* constexpr.c (unshare_constructor): No longer static.
* cp-tree.h (unshare_constructor): Declare.
* semantics.c (finish_compound_literal): When dealing with a
non-dependent expression in a template, return the original
expression.  Pass LOOKUP_NO_NARROWING to digest_init_flags.

* g++.dg/cpp0x/range-for37.C: New test.

diff --git gcc/cp/constexpr.c gcc/cp/constexpr.c
index 923763faa0a..d946a797999 100644
--- gcc/cp/constexpr.c
+++ gcc/cp/constexpr.c
@@ -1318,7 +1318,7 @@ find_constructor (tree *tp, int *walk_subtrees, void *)
 /* If T is a CONSTRUCTOR or an expression that has a CONSTRUCTOR node as a
subexpression, return an unshared copy of T.  Otherwise return T.  */
 
-static tree
+tree
 unshare_constructor (tree t)
 {
   tree ctor = walk_tree (&t, find_constructor, NULL, NULL);
diff --git gcc/cp/cp-tree.h gcc/cp/cp-tree.h
index 44a3620a539..60ca1366cf6 100644
--- gcc/cp/cp-tree.h
+++ gcc/cp/cp-tree.h
@@ -7710,6 +7710,7 @@ extern void explain_invalid_constexpr_fn(tree);
 extern vec cx_error_context   (void);
 extern tree fold_sizeof_expr   (tree);
 extern void clear_cv_and_fold_caches   (void);
+extern tree unshare_constructor(tree);
 
 /* In cp-ubsan.c */
 extern void cp_ubsan_maybe_instrument_member_call (tree);
diff --git gcc/cp/semantics.c gcc/cp/semantics.c
index aa5a163dd64..3ecd192bced 100644
--- gcc/cp/semantics.c
+++ gcc/cp/semantics.c
@@ -2796,17 +2796,31 @@ finish_compound_literal (tree t

Re: [RS6000] Fix _ and tf_ splitters

2019-02-16 Thread Alan Modra
On Sat, Feb 16, 2019 at 11:14:11AM -0600, Segher Boessenkool wrote:
> Okay, thanks!  For trunk as well as backports.

Applied to trunk.  gcc-8 is frozen until after the 8.3 release so I'll
have to wait a while for backports.  There is some chance we'll hit
ICEs on 8.3 like we occasionally did when reload used to put a loop
counter in an fpr..

-- 
Alan Modra
Australia Development Lab, IBM


Re: C++ PATCH for c++/89217 - ICE with list-initialization in range-based for loop

2019-02-16 Thread Jason Merrill

On 2/16/19 8:23 PM, Marek Polacek wrote:

On Fri, Feb 15, 2019 at 01:59:10PM -1000, Jason Merrill wrote:

On 2/11/19 6:03 PM, Marek Polacek wrote:

On Mon, Feb 11, 2019 at 01:43:36PM -0500, Jason Merrill wrote:

On 2/7/19 6:02 PM, Marek Polacek wrote:

Since r268321 we can call digest_init even in a template, when the compound
literal isn't instantiation-dependent.


Right.  And since digest_init modifies the CONSTRUCTOR in place, that means
the template trees are digested rather than the original parse trees that we
try to use.  If we're going to use digest_init, we should probably save
another CONSTRUCTOR with the original trees.


I tried unsharing the constructor and even its contents but only then did I
realize that this cannot work.


Why wouldn't going back to saving {*((struct S *) this)->r} work?


Sorry, I misunderstood what you meant by "saving".  I think I do now.


It's not digest_init that adds the problematic
INDIRECT_REF via convert_from_reference, it's instantiate_pending_templates
-> tsubst_expr -> ... -> finish_non_static_data_member.

So the problem isn't sharing the contents of the CONSTRUCTOR, but rather what
finish_non_static_data_member does with the

{.r=(struct R &) (struct R *) ((struct S *) this)->r}

expression.  The same problem would appear even before r268321 changes if we
called tsubst_* twice on the CONSTRUCTOR above.


Yes, it sounds like there's a bug in that path as well.  Perhaps
tsubst_copy_and_build/COMPONENT_REF should strip a REFERENCE_REF_P if t was
already a reference.


With this patch, this seems no longer to be needed.


Do you still think digest_init and/or finish_compound_literal need tweaking?


I imagine that saving post-digest trees might cause other problems, but
perhaps not.  Perhaps we ought to move away more generally from trying to
save the original parse trees for non-dependent expressions and messing with
NON_DEPENDENT_EXPR.


Now that I've spent a lot of time looking into 89356 (and the other PRs broken
by the same revision), I'm convinced that we must return the original tree in
finish_compound_literal.  But we still have to call digest_init or we lose
detecting narrowing conversions.  What happens in that PR is that after
digest_init we lose the braced-init-list and that changes mangling.  I've come
up with this fix for 89356, but it also fixes this PR, and likely all the
others.

The comments hopefully explain what I'm doing and why, the only suspicious
thing is the get_target_expr_sfinae call, that is so that initlist109.C
keeps compiling; without the call to get_target_expr_sfinae, we end up
issuing an error in digest_init_r:
1224   if (COMPOUND_LITERAL_P (stripped_init) && code == ARRAY_TYPE)
1225 {
1226   if (complain & tf_error)
1227 error_at (loc, "cannot initialize aggregate of type %qT with "
1228   "a compound literal", type);

But I hope the rest of the patch is reasonable.  The LOOKUP_NO_NARROWING bit
isn't necessary but it should be a correct thing to do, so that later in
perform_implicit_conversion_flags we properly set the recently added flag
IMPLICIT_CONV_EXPR_BRACED_INIT.

Bootstrapped/regtested on x86_64-linux and ppc64le-linux, ok for trunk?

2019-02-16  Marek Polacek  

PR c++/89217 - ICE with list-initialization in range-based for loop.
* constexpr.c (unshare_constructor): No longer static.
* cp-tree.h (unshare_constructor): Declare.
* semantics.c (finish_compound_literal): When dealing with a
non-dependent expression in a template, return the original
expression.  Pass LOOKUP_NO_NARROWING to digest_init_flags.


OK.

Jason



Re: [Patch] [arm] Fix 88714, Arm LDRD/STRD peepholes

2019-02-16 Thread Jakub Jelinek
On Mon, Feb 11, 2019 at 12:08:32PM +0100, Jakub Jelinek wrote:
> So like the patch below (though, I have only limited possibilities to test
> this, can throw it in armv7hl-linux-gnueabi distro build).

Actually, that patch was bad, I misread the CORE_REGS vs. GENERAL_REGS
hardregset difference, it is actually sp that is not GENERAL_REGS but is
CORE_REGS, not ip.  So here is an updated patch, same except that in
ldrdstrd.md the q constraints are kept in the right spot.
To repeat, I don't think the q constraints on movdi are now needed, because
ldrdstrd doesn't use those DImode patterns and RA will not allocate a DImode 
hard
reg starting at ip because sp is a fixed register.

Bootstrapped/regtested on armv7hl-linux-gnueabi (distro build), ok for
trunk?

2019-02-17  Jakub Jelinek  

PR bootstrap/88714
* config/arm/arm.md (*arm_movdi, *movdf_soft_insn): Use "r" instead of
"q" constraint.
* config/arm/vfp.md (*movdi_vfp): Likewise.
* config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use "r" instead of
"q" constraint for operands[0].

--- gcc/config/arm/arm.md.jj2019-01-31 00:26:04.417738975 +0100
+++ gcc/config/arm/arm.md   2019-02-11 12:02:32.778707056 +0100
@@ -5817,8 +5817,8 @@ (define_expand "movdi"
 )
 
 (define_insn "*arm_movdi"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
-   (match_operand:DI 1 "di_operand"  "rDa,Db,Dc,mi,q"))]
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
+   (match_operand:DI 1 "di_operand"  "rDa,Db,Dc,mi,r"))]
   "TARGET_32BIT
&& !(TARGET_HARD_FLOAT)
&& !TARGET_IWMMXT
@@ -7102,8 +7102,8 @@ (define_expand "reload_outdf"
 )
 
 (define_insn "*movdf_soft_insn"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
-   (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
+   (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
   "TARGET_32BIT && TARGET_SOFT_FLOAT
&& (   register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))"
--- gcc/config/arm/vfp.md.jj2019-01-31 00:26:04.312740661 +0100
+++ gcc/config/arm/vfp.md   2019-02-11 12:03:13.232045976 +0100
@@ -307,8 +307,8 @@ (define_insn "*thumb2_movsi_vfp"
 ;; DImode moves
 
 (define_insn "*movdi_vfp"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" 
"=r,r,r,r,q,q,m,w,!r,w,w, Uv")
-   (match_operand:DI 1 "di_operand"  
"r,rDa,Db,Dc,mi,mi,q,r,w,w,UvTu,w"))]
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" 
"=r,r,r,r,r,r,m,w,!r,w,w, Uv")
+   (match_operand:DI 1 "di_operand"  
"r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]
   "TARGET_32BIT && TARGET_HARD_FLOAT
&& (   register_operand (operands[0], DImode)
|| register_operand (operands[1], DImode))
--- gcc/config/arm/ldrdstrd.md.jj   2019-02-11 11:39:39.977125795 +0100
+++ gcc/config/arm/ldrdstrd.md  2019-02-11 12:03:57.978314745 +0100
@@ -157,7 +157,7 @@
 ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
 ;; operands are not changed.
 (define_insn "*arm_ldrd"
-  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
   (match_operand:SI 2 "memory_operand" "m"))
  (set (match_operand:SI 1 "s_register_operand" "=q")
   (match_operand:SI 3 "memory_operand" "m"))])]
@@ -178,7 +178,7 @@
 
 (define_insn "*arm_strd"
   [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
-  (match_operand:SI 0 "s_register_operand" "q"))
+  (match_operand:SI 0 "s_register_operand" "r"))
  (set (match_operand:SI 3 "memory_operand" "=m")
   (match_operand:SI 1 "s_register_operand" "q"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed


Jakub