Re: [PATCH] i386: Fix GLC tuning with -masm=intel [PR104104]

2022-01-18 Thread Wang, Hongyu via Gcc-patches
Sorry for introducing such failure and thanks for the patch, I suppose it could 
be treated as obvious fix?

发件人: Jakub Jelinek 
发送时间: 星期三, 一月 19, 2022 8:01 上午
收件人: Hongtao Liu; Uros Bizjak
抄送: gcc-patches@gcc.gnu.org; Wang, Hongyu
主题: [PATCH] i386: Fix GLC tuning with -masm=intel [PR104104]

On Sun, Jan 16, 2022 at 12:22:18PM +0800, Hongtao Liu via Gcc-patches wrote:
> On Sun, Jan 16, 2022 at 12:44 AM Uros Bizjak via Gcc-patches
>  wrote:
> >
> > On Sat, Jan 15, 2022 at 5:39 PM Hongyu Wang  wrote:
> > >
> > > Thanks for the suggestion, here is the updated patch that survived
> > > bootstrap/regtest.
> >
> > LGTM for me, but please get the final approval from Hongtao.
> >
> Ok, thanks.

Unfortunately the patch results in assembler failures with -masm=intel.

> > > > +  if (TARGET_DEST_FALSE_DEPENDENCY
> > > > +  && get_attr_dest_false_dep (insn) ==
> > > > +DEST_FALSE_DEP_TRUE)
> > > > +output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);

All the vxorps insns were emitted like the above, which means for -masm=sysv
it looks like
vxorps  %xmm3, %xmm3, %xmm3
but for -masm=intel like:
vxorps
We want obviously
vxorps  xmm3, xmm3, xmm3
so the following patch just drops the errorneous {}s.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-01-19  Jakub Jelinek  

PR target/104104
* config/i386/sse.md
(__,

avx512fp16_sh_v8hf,
avx512dq_mul3, _permvar,
avx2_perm_1, avx512f_perm_1,
avx512dq_rangep,
avx512dq_ranges,
_getmant,
avx512f_vgetmant):
Use vxorps\t%x0, %x0, %x0 instead of vxorps\t{%x0, %x0, %x0}.

* gcc.target/i386/pr104104.c: New test.

--- gcc/config/i386/sse.md.jj   2022-01-18 11:58:59.156988142 +0100
+++ gcc/config/i386/sse.md  2022-01-18 21:20:40.02248 +0100
@@ -6539,7 +6539,7 @@ (define_insn "__<
 {
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
   && )
-output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "v\t{%2, %1, 
%0|%0, %1, %2}";
 }
   [(set_attr "type" "ssemul")
@@ -6750,7 +6750,7 @@ (define_insn "avx512fp16_
 {
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
   && )
-output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vsh\t{%2, %1, 
%0|%0, %1, 
%2}";
 }
   [(set_attr "type" "ssemul")
@@ -15222,7 +15222,7 @@ (define_insn "avx512dq_mul3
   && !reg_mentioned_p (operands[0], operands[1])
   && !reg_mentioned_p (operands[0], operands[2]))
-output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vpmullq\t{%2, %1, %0|%0, %1, %2}";
 }
   [(set_attr "type" "sseimul")
@@ -24658,7 +24658,7 @@ (define_insn "_permvar
   && !reg_mentioned_p (operands[0], operands[1])
   && !reg_mentioned_p (operands[0], operands[2]))
-output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vperm\t{%1, %2, %0|%0, 
%2, %1}";
 }
   [(set_attr "type" "sselog")
@@ -24900,7 +24900,7 @@ (define_insn "avx2_perm_1
   && !reg_mentioned_p (operands[0], operands[1]))
-output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vperm\t{%2, %1, %0|%0, 
%1, %2}";
 }
   [(set_attr "type" "sselog")
@@ -24975,7 +24975,7 @@ (define_insn "avx512f_perm_1
   && !reg_mentioned_p (operands[0], operands[1]))
-output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vperm\t{%2, %1, 
%0|%0, %1, %2}";
 }
   [(set_attr "type" "sselog")
@@ -26880,7 +26880,7 @@ (define_insn "avx512dq_rangep
   && !reg_mentioned_p (operands[0], operands[1])
   && !reg_mentioned_p (operands[0], operands[2]))
-output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vrange\t{%3, %2, %1, 
%0|%0, %1, %2, %3}";
 }
   [(set_attr "type" "sse")
@@ -26903,7 +26903,7 @@ (define_insn "avx512dq_ranges
   && !reg_mentioned_p (operands[0], operands[1])
   && !reg_mentioned_p (operands[0], operands[2]))
-output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vrange\t{%3, %2, 
%1, %0|%0, %1, 
%2, %3}";
 }
   [(set_attr "type" "sse")
@@ -26949,7 +26949,7 @@ (define_insn "_getmant
   && MEM_P (operands[1]))
-output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
   return "vgetmant\t{%2, %1, 
%0|%0, %1, %2}";
 }
   [(set_attr "prefix" "evex")
@@ -26971,7 +26971,7 @@ (define_insn "avx512f_vgetmant
   && !reg_mentioned_p (operands[0], operands[1])
   && !reg_mentioned_p (operands[0], operands[2]))
-output_asm_insn ("vxorps

RE: [PATCH V2] Enable small loop unrolling for O2

2022-11-10 Thread Wang, Hongyu via Gcc-patches
Thanks for the notification! I’m not aware of the compile farm before. Will see 
what’s the impact of my patch then.

Regards,
Hongyu, Wang

From: David Edelsohn 
Sent: Thursday, November 10, 2022 1:22 AM
To: Wang, Hongyu 
Cc: GCC Patches 
Subject: Re: [PATCH V2] Enable small loop unrolling for O2

> This patch does not change rs6000/s390 since I don't have machines to
> test them, but I suppose the default behavior is the same since they
> enable flag_unroll_loops at O2.

There are Power (rs6000) systems in the Compile Farm.

Trial Linux on Z (s390x) VMs are available through the Linux Community Cloud.
https://linuxone.cloud.marist.edu/#/register?flag=VM

Thanks, David