[PATCH, i386]: Fix PR 70998, CE in pre_and_rev_post_order_compute, at cfganal.c

Uros Bizjak Sun, 08 May 2016 11:33:01 -0700

Hello!

As exposed by r235906 [1], we should not widen DFmode memory access to
V2DFmode in the splitter.


Attached patch introduces two new patterns that use correct mode of
memory operand. These two patterns are appropriate for the
TARGET_SSE_PARTIAL_REG_DEPENDENCY splitters, as they don't need to
widen memory access.

2016-05-08  Uros Bizjak  <ubiz...@gmail.com>

    PR target/70998
    * config/i386/sse.md (*sse2_vd_cvtsd2ss): New insn pattern.
    (*sse2_vd_cvtss2sd): Ditto.
    * config/i386/i386.md
    (TARGET_SSE_PARTIAL_REG_DEPENDENCY float_truncate df->sf splitter):
    Generate *sse2_vd_cvtsd2ss pattern.
    (TARGET_SSE_PARTIAL_REG_DEPENDENCY float_extend sf->df splitter):
    Generate *sse2_vd_cvtss2sd pattern.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

[1] https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=235906

Uros.

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 236007)
+++ config/i386/i386.md (working copy)
@@ -5192,13 +5192,12 @@
   [(set (match_dup 0)
        (vec_merge:V4SF
          (vec_duplicate:V4SF
-           (float_truncate:V2SF
+           (float_truncate:SF
              (match_dup 1)))
          (match_dup 0)
          (const_int 1)))]
 {
   operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
-  operands[1] = lowpart_subreg (V2DFmode, operands[1], DFmode);
   emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
 })
 
@@ -5219,15 +5218,13 @@
        || TARGET_AVX512VL)"
   [(set (match_dup 0)
         (vec_merge:V2DF
-          (float_extend:V2DF
-            (vec_select:V2SF
-              (match_dup 1)
-              (parallel [(const_int 0) (const_int 1)])))
-          (match_dup 0)
+         (vec_duplicate:V2DF
+           (float_extend:DF
+             (match_dup 1)))
+         (match_dup 0)
           (const_int 1)))]
 {
   operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
-  operands[1] = lowpart_subreg (V4SFmode, operands[1], SFmode);
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md  (revision 236007)
+++ config/i386/sse.md  (working copy)
@@ -4949,6 +4949,27 @@
    (set_attr "prefix" "orig,orig,<round_prefix>")
    (set_attr "mode" "SF")])
 
+(define_insn "*sse2_vd_cvtsd2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF
+           (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" 
"x,m,vm")))
+         (match_operand:V4SF 1 "register_operand" "0,0,v")
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   cvtsd2ss\t{%2, %0|%0, %2}
+   cvtsd2ss\t{%2, %0|%0, %2}
+   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "athlon_decode" "vector,double,*")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "bdver1_decode" "direct,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "SF")])
+
 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
        (vec_merge:V2DF
@@ -4972,6 +4993,27 @@
    (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
    (set_attr "mode" "DF")])
 
+(define_insn "*sse2_vd_cvtss2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
+       (vec_merge:V2DF
+         (vec_duplicate:V2DF
+           (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" 
"x,m,vm")))
+         (match_operand:V2DF 1 "register_operand" "0,0,v")
+         (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   cvtss2sd\t{%2, %0|%0, %2}
+   cvtss2sd\t{%2, %0|%0, %2}
+   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "athlon_decode" "direct,direct,*")
+   (set_attr "bdver1_decode" "direct,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "DF")])
+
 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
   [(set (match_operand:V8SF 0 "register_operand" "=v")
        (float_truncate:V8SF

[PATCH, i386]: Fix PR 70998, CE in pre_and_rev_post_order_compute, at cfganal.c

Reply via email to