Hi,

When I created the patch to split VSX loads and stores to add permutes
so the register image was correctly little endian, I forgot to implement
a known requirement.  When a VSX store is split after reload, we must
reuse the source register for the permute, which leaves it in a swapped
state after the store.  If the register remains live, this is invalid.
After the store, we need to permute the register back to its original
value.

For each of the little endian VSX store patterns, I've replaced the
define_insn_and_split with a define_insn and two define_splits, one for
prior to reload and one for after reload.  The post-reload split has the
extra behavior.

I don't know of a way to set the insn's length attribute conditionally,
so I'm optimistically setting this to 8, though it will be 12 in the
post-reload case.  Is there any concern with that?  Is there a way to
make it fully accurate?

Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions.  This fixes two failing test cases.  Is this ok for trunk?

Thanks!
Bill


2013-11-02  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/vsx.md (*vsx_le_perm_store_<mode> for VSX_D):
        Replace the define_insn_and_split with a define_insn and two
        define_splits, with the split after reload re-permuting the source
        register to its original value.
        (*vsx_le_perm_store_<mode> for VSX_W): Likewise.
        (*vsx_le_perm_store_v8hi): Likewise.
        (*vsx_le_perm_store_v16qi): Likewise.


Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md    (revision 204192)
+++ gcc/config/rs6000/vsx.md    (working copy)
@@ -333,12 +333,18 @@
   [(set_attr "type" "vecload")
    (set_attr "length" "8")])
 
-(define_insn_and_split "*vsx_le_perm_store_<mode>"
+(define_insn "*vsx_le_perm_store_<mode>"
   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
         (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:VSX_D 0 "memory_operand" "")
+        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
   [(set (match_dup 2)
         (vec_select:<MODE>
           (match_dup 1)
@@ -347,21 +353,43 @@
         (vec_select:<MODE>
           (match_dup 2)
           (parallel [(const_int 1) (const_int 0)])))]
-  "
 {
   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
                                        : operands[1];
-}
-  "
-  [(set_attr "type" "vecstore")
-   (set_attr "length" "8")])
+})
 
-(define_insn_and_split "*vsx_le_perm_store_<mode>"
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:VSX_D 0 "memory_operand" "")
+        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))]
+  "")
+
+(define_insn "*vsx_le_perm_store_<mode>"
   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
         (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:VSX_W 0 "memory_operand" "")
+        (match_operand:VSX_W 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
   [(set (match_dup 2)
         (vec_select:<MODE>
           (match_dup 1)
@@ -372,21 +400,46 @@
           (match_dup 2)
           (parallel [(const_int 2) (const_int 3)
                     (const_int 0) (const_int 1)])))]
-  "
 {
   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
                                        : operands[1];
-}
-  "
-  [(set_attr "type" "vecstore")
-   (set_attr "length" "8")])
+})
 
-(define_insn_and_split "*vsx_le_perm_store_v8hi"
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:VSX_W 0 "memory_operand" "")
+        (match_operand:VSX_W 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+                    (const_int 0) (const_int 1)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+                    (const_int 0) (const_int 1)])))
+   (set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+                    (const_int 0) (const_int 1)])))]
+  "")
+
+(define_insn "*vsx_le_perm_store_v8hi"
   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:V8HI 0 "memory_operand" "")
+        (match_operand:V8HI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
   [(set (match_dup 2)
         (vec_select:V8HI
           (match_dup 1)
@@ -401,21 +454,52 @@
                      (const_int 6) (const_int 7)
                      (const_int 0) (const_int 1)
                      (const_int 2) (const_int 3)])))]
-  "
 {
   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
                                        : operands[1];
-}
-  "
-  [(set_attr "type" "vecstore")
-   (set_attr "length" "8")])
+})
 
-(define_insn_and_split "*vsx_le_perm_store_v16qi"
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:V8HI 0 "memory_operand" "")
+        (match_operand:V8HI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 0)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 1)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "")
+
+(define_insn "*vsx_le_perm_store_v16qi"
   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:V16QI 0 "memory_operand" "")
+        (match_operand:V16QI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
   [(set (match_dup 2)
         (vec_select:V16QI
           (match_dup 1)
@@ -438,16 +522,53 @@
                      (const_int 2) (const_int 3)
                      (const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)])))]
-  "
 {
   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
                                        : operands[1];
-}
-  "
-  [(set_attr "type" "vecstore")
-   (set_attr "length" "8")])
+})
 
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:V16QI 0 "memory_operand" "")
+        (match_operand:V16QI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 0)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 1)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "")
 
+
 (define_insn "*vsx_mov<mode>"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand" 
"=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
        (match_operand:VSX_M 1 "input_operand" 
"<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]


Reply via email to