Hello!

There is no point to emit vmovaps instead of vmovapd or vmovdqa, these
instructions have same sizes. Attached patch fixes this oversight for
TARGET_AVX.

2012-05-11  Uros Bizjak  <ubiz...@gmail.com>

        * config/i386/i386.md (*movti_internal_rex64): Avoid MOVAPS size
        optimization for TARGET_AVX.
        (*movti_internal_sse): Ditto.
        (*movdi_internal_rex64): Handle TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL.
        (*movdi_internal): Ditto.
        (*movsi_internal): Ditto.
        (*movtf_internal): Avoid MOVAPS size optimization for TARGET_AVX.
        (*movdf_internal_rex64): Ditto.
        (*movfd_internal): Ditto.
        (*movsf_internal): Ditto.
        * config/i386/sse.md (mov<mode>): Handle TARGET_SSE_LOAD0_BY_PXOR.

Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN.

Uros.
Index: i386.md
===================================================================
--- i386.md     (revision 187372)
+++ i386.md     (working copy)
@@ -1890,12 +1890,15 @@
    (set (attr "mode")
        (cond [(eq_attr "alternative" "0,1")
                 (const_string "DI")
-              (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                   (match_test "optimize_function_for_size_p (cfun)"))
+              (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
                 (const_string "V4SF")
               (and (eq_attr "alternative" "4")
                    (match_test "TARGET_SSE_TYPELESS_STORES"))
                 (const_string "V4SF")
+              (match_test "TARGET_AVX")
+                (const_string "TI")
+              (match_test "optimize_function_for_size_p (cfun)")
+                (const_string "V4SF")
               ]
               (const_string "TI")))])
 
@@ -1943,13 +1946,15 @@
   [(set_attr "type" "sselog1,ssemov,ssemov")
    (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
-       (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                   (match_test "optimize_function_for_size_p (cfun)"))
+       (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
                 (const_string "V4SF")
               (and (eq_attr "alternative" "2")
                    (match_test "TARGET_SSE_TYPELESS_STORES"))
                 (const_string "V4SF")
-              (not (match_test "TARGET_SSE2"))
+              (match_test "TARGET_AVX")
+                (const_string "TI")
+              (ior (not (match_test "TARGET_SSE2"))
+                   (match_test "optimize_function_for_size_p (cfun)"))
                 (const_string "V4SF")
              ]
              (const_string "TI")))])
@@ -1970,8 +1975,11 @@
        return "movdq2q\t{%1, %0|%0, %1}";
 
     case TYPE_SSEMOV:
-      if (get_attr_mode (insn) == MODE_TI)
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "%vmovaps\t{%1, %0|%0, %1}";
+      else if (get_attr_mode (insn) == MODE_TI)
        return "%vmovdqa\t{%1, %0|%0, %1}";
+
       /* Handle broken assemblers that require movd instead of movq.  */
       if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
        return "%vmovd\t{%1, %0|%0, %1}";
@@ -2048,7 +2056,20 @@
      (if_then_else (eq_attr "alternative" "10,11,12,13,14,15")
        (const_string "maybe_vex")
        (const_string "orig")))
-   (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,TI,DI,TI,DI,DI,DI,DI,DI")])
+   (set (attr "mode")
+       (cond [(eq_attr "alternative" "0,4")
+                 (const_string "SI")
+              (eq_attr "alternative" "10,12")
+                 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                          (const_string "V4SF")
+                        (match_test "TARGET_AVX")
+                          (const_string "TI")
+                        (match_test "optimize_function_for_size_p (cfun)")
+                          (const_string "V4SF")
+                       ]
+                       (const_string "TI"))
+             ]
+             (const_string "DI")))])
 
 ;; Reload patterns to support multi-word load/store
 ;; with non-offsetable address.
@@ -2142,7 +2163,7 @@
        case MODE_DI:
           return "%vmovq\t{%1, %0|%0, %1}";
        case MODE_V4SF:
-         return "movaps\t{%1, %0|%0, %1}";
+         return "%vmovaps\t{%1, %0|%0, %1}";
        case MODE_V2SF:
          return "movlps\t{%1, %0|%0, %1}";
        default:
@@ -2189,7 +2210,22 @@
      (if_then_else (eq_attr "alternative" "5,6,7,8")
        (const_string "maybe_vex")
        (const_string "orig")))
-   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF,DI,DI")])
+   (set (attr "mode")
+       (cond [(eq_attr "alternative" "9,11")
+                 (const_string "V4SF")
+              (eq_attr "alternative" "10,12")
+                 (const_string "V2SF")
+              (eq_attr "alternative" "5,7")
+                 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                          (const_string "V4SF")
+                        (match_test "TARGET_AVX")
+                          (const_string "TI")
+                        (match_test "optimize_function_for_size_p (cfun)")
+                          (const_string "V4SF")
+                       ]
+                       (const_string "TI"))
+             ]
+             (const_string "DI")))])
 
 (define_split
   [(set (match_operand:DI 0 "nonimmediate_operand")
@@ -2271,10 +2307,15 @@
      (cond [(eq_attr "alternative" "2,3")
              (const_string "DI")
            (eq_attr "alternative" "6,7")
-             (if_then_else
-               (not (match_test "TARGET_SSE2"))
-               (const_string "V4SF")
-               (const_string "TI"))
+             (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                      (const_string "V4SF")
+                    (match_test "TARGET_AVX")
+                      (const_string "TI")
+                    (ior (not (match_test "TARGET_SSE2"))
+                         (match_test "optimize_function_for_size_p (cfun)"))
+                      (const_string "V4SF")
+                   ]
+                   (const_string "TI"))
            (and (eq_attr "alternative" "8,9,10,11")
                 (not (match_test "TARGET_SSE2")))
              (const_string "SF")
@@ -2881,12 +2922,15 @@
    (set (attr "mode")
         (cond [(eq_attr "alternative" "3,4")
                 (const_string "DI")
-              (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                   (match_test "optimize_function_for_size_p (cfun)"))
+              (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
                 (const_string "V4SF")
               (and (eq_attr "alternative" "2")
                    (match_test "TARGET_SSE_TYPELESS_STORES"))
                 (const_string "V4SF")
+              (match_test "TARGET_AVX")
+                (const_string "TI")
+              (match_test "optimize_function_for_size_p (cfun)")
+                (const_string "V4SF")
               ]
               (const_string "TI")))])
 
@@ -3030,9 +3074,11 @@
               (eq_attr "alternative" "3,4,5,6,11,12")
                 (const_string "DI")
 
-              /* xorps is one byte shorter.  */
+              /* xorps is one byte shorter for !TARGET_AVX.  */
               (eq_attr "alternative" "7")
-                (cond [(match_test "optimize_function_for_size_p (cfun)")
+                (cond [(match_test "TARGET_AVX")
+                         (const_string "V2DF")
+                       (match_test "optimize_function_for_size_p (cfun)")
                          (const_string "V4SF")
                        (match_test "TARGET_SSE_LOAD0_BY_PXOR")
                          (const_string "TI")
@@ -3043,14 +3089,16 @@
                  whole SSE registers use APD move to break dependency
                  chains, otherwise use short move to avoid extra work.
 
-                 movaps encodes one byte shorter.  */
+                 movaps encodes one byte shorter for !TARGET_AVX.  */
               (eq_attr "alternative" "8")
-                (cond
-                  [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                        (match_test "optimize_function_for_size_p (cfun)"))
-                     (const_string "V4SF")
-                   (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-                     (const_string "V2DF")
+                (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                         (const_string "V4SF")
+                       (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+                         (const_string "V2DF")
+                       (match_test "TARGET_AVX")
+                         (const_string "DF")
+                       (match_test "optimize_function_for_size_p (cfun)")
+                         (const_string "V4SF")
                   ]
                   (const_string "DF"))
               /* For architectures resolving dependencies on register
@@ -3165,9 +3213,11 @@
                   (const_string "V4SF")
                   (const_string "V2SF"))
 
-              /* xorps is one byte shorter.  */
+              /* xorps is one byte shorter for !TARGET_AVX.  */
               (eq_attr "alternative" "5,9")
-                (cond [(match_test "optimize_function_for_size_p (cfun)")
+                (cond [(match_test "TARGET_AVX")
+                         (const_string "V2DF")
+                       (match_test "optimize_function_for_size_p (cfun)")
                          (const_string "V4SF")
                        (match_test "TARGET_SSE_LOAD0_BY_PXOR")
                          (const_string "TI")
@@ -3178,16 +3228,19 @@
                  whole SSE registers use APD move to break dependency
                  chains, otherwise use short move to avoid extra work.
 
-                 movaps encodes one byte shorter.  */
+                 movaps encodes one byte shorter for !TARGET_AVX.  */
               (eq_attr "alternative" "6,10")
-                (cond
-                  [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                        (match_test "optimize_function_for_size_p (cfun)"))
-                     (const_string "V4SF")
-                   (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-                     (const_string "V2DF")
+                (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+                         (const_string "V4SF")
+                       (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+                         (const_string "V2DF")
+                       (match_test "TARGET_AVX")
+                         (const_string "DF")
+                       (match_test "optimize_function_for_size_p (cfun)")
+                         (const_string "V4SF")
                   ]
                   (const_string "DF"))
+
               /* For architectures resolving dependencies on register
                  parts we may avoid extra work to zero out upper part
                  of register.  */
@@ -3277,12 +3330,16 @@
         (cond [(eq_attr "alternative" "3,4,9,10")
                 (const_string "SI")
               (eq_attr "alternative" "5")
-                (if_then_else
-                  (and (and (match_test "TARGET_SSE_LOAD0_BY_PXOR")
-                            (match_test "TARGET_SSE2"))
-                       (not (match_test "optimize_function_for_size_p 
(cfun)")))
-                  (const_string "TI")
-                  (const_string "V4SF"))
+                (cond [(match_test "TARGET_AVX")
+                         (const_string "V4SF")
+                       (ior (not (match_test "TARGET_SSE2"))
+                            (match_test "optimize_function_for_size_p (cfun)"))
+                         (const_string "V4SF")
+                       (match_test "TARGET_SSE_LOAD0_BY_PXOR")
+                         (const_string "TI")
+                      ]
+                      (const_string "V4SF"))
+
               /* For architectures resolving dependencies on
                  whole SSE registers use APS move to break dependency
                  chains, otherwise use short move to avoid extra work.
Index: sse.md
===================================================================
--- sse.md      (revision 187372)
+++ sse.md      (working copy)
@@ -491,6 +491,9 @@
               (ior (not (match_test "TARGET_SSE2"))
                    (match_test "optimize_function_for_size_p (cfun)"))
                 (const_string "V4SF")
+              (and (eq_attr "alternative" "0")
+                   (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
+                (const_string "TI")
              ]
              (const_string "<sseinsnmode>")))])
 

Reply via email to