Hi,

ISA 3.0 adds the lvxh8x, lvxb16x, stvxh8x, and stvxb16x instructions,
which perform vector loads in big-endian order, regardless of the target
endianness.  These join the similar lvxd2x, lvxw4x, stvxd2x, and stvxw4x
instructions introduced in 2.6.  These existing instructions have been
used in several ways, but we don't yet have built-ins to allow them to
be specifically generated for little-endian.  This patch corrects that,
and adds built-ins for the new ISA 3.0 instructions as well.

Note that the behavior of lvxd2x, lvxw4x, lvxh8x, and lxvb16x are
indistinguishable from one another in big-endian mode, and similarly for
the stores.  So we can treat these as simple moves that will generate
any applicable load or store (such as lxvx and stxvx for ISA 3.0).  For
little-endian, however, we require separate patterns for each of these
loads and stores to ensure that we get the correct element-reversal
semantics for each of them, depending on the vector mode.

(While working on this patch, I happened to notice that the existing
entries in rs6000-builtin.def for STXVD2X_<MODE> and STXVW4X_<MODE> are
mapped to stxsdx instead of stxvd2x/stxvw4x.  I took the opportunity to
correct that as an obvious bug.)

I've added four new tests to demonstrate correct behavior of the new
built-in functions.  These include variants for big- and little-endian,
and variants for -mcpu=power8 and -mcpu=power9.

Bootstrapped and tested on powerpc64-unknown-linux-gnu and
powerpc64le-unknown-linux-gnu with no regressions.  Is this ok for
trunk, following GCC 6 release?

Thanks,
Bill


[gcc]

2016-04-24  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/rs6000-builtin.def (STXVD2X_V1TI): Fix target
        built-in function name.
        (STXVD2X_V2DF): Likewise.
        (STXVD2X_V2DI): Likewise.
        (STXVW4X_V4SF): Likewise.
        (STXVW4X_V4SI): Likewise.
        (STXVW4X_V8HI): Likewise.
        (STXVW4X_V16QI): Likewise.
        (LD_ELEMREV_V2DF): New.
        (LD_ELEMREV_V2DI): New.
        (LD_ELEMREV_V4SF): New.
        (LD_ELEMREV_V4SI): New.
        (LD_ELEMREV_V8HI): New.
        (LD_ELEMREV_V16QI): New.
        (ST_ELEMREV_V2DF): New.
        (ST_ELEMREV_V2DI): New.
        (ST_ELEMREV_V4SF): New.
        (ST_ELEMREV_V4SI): New.
        (ST_ELEMREV_V8HI): New.
        (ST_ELEMREV_V16QI): New.
        * config/rs6000/rs6000.c (altivec_expand_builtin): Add handling
        for VSX_BUILTIN_ST_ELEMREV_<MODE> and
        VSX_BUILTIN_LD_ELEMREV_<MODE>.
        (altivec_init_builtins): Likewise.
        * config/rs6000/vsx.md (vsx_ld_elemrev_v2di): New define_insn.
        (vsx_ld_elemrev_v2df): Likewise.
        (vsx_ld_elemrev_v4sf): Likewise.
        (vsx_ld_elemrev_v4si): Likewise.
        (vsx_ld_elemrev_v8hi): Likewise.
        (vsx_ld_elemrev_v16qi): Likewise.
        (vsx_st_elemrev_v2df): Likewise.
        (vsx_st_elemrev_v2di): Likewise.
        (vsx_st_elemrev_v4sf): Likewise.
        (vsx_st_elemrev_v4si): Likewise.
        (vsx_st_elemrev_v8hi): Likewise.
        (vsx_st_elemrev_v16qi): Likewise.

[gcc/testsuite]

2016-04-24  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * gcc.target/powerpc/vsx-elemrev-1.c: New.
        * gcc.target/powerpc/vsx-elemrev-2.c: New.
        * gcc.target/powerpc/vsx-elemrev-3.c: New.
        * gcc.target/powerpc/vsx-elemrev-4.c: New.


diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 5b82b00..aa87633 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1391,13 +1391,25 @@ BU_VSX_X (LXVW4X_V4SI,        "lxvw4x_v4si",    MEM)
 BU_VSX_X (LXVW4X_V8HI,        "lxvw4x_v8hi",   MEM)
 BU_VSX_X (LXVW4X_V16QI,              "lxvw4x_v16qi",   MEM)
 BU_VSX_X (STXSDX,            "stxsdx",         MEM)
-BU_VSX_X (STXVD2X_V1TI,              "stxsdx_v1ti",    MEM)
-BU_VSX_X (STXVD2X_V2DF,              "stxsdx_v2df",    MEM)
-BU_VSX_X (STXVD2X_V2DI,              "stxsdx_v2di",    MEM)
-BU_VSX_X (STXVW4X_V4SF,              "stxsdx_v4sf",    MEM)
-BU_VSX_X (STXVW4X_V4SI,              "stxsdx_v4si",    MEM)
-BU_VSX_X (STXVW4X_V8HI,              "stxsdx_v8hi",    MEM)
-BU_VSX_X (STXVW4X_V16QI,      "stxsdx_v16qi",  MEM)
+BU_VSX_X (STXVD2X_V1TI,              "stxvd2x_v1ti",   MEM)
+BU_VSX_X (STXVD2X_V2DF,              "stxvd2x_v2df",   MEM)
+BU_VSX_X (STXVD2X_V2DI,              "stxvd2x_v2di",   MEM)
+BU_VSX_X (STXVW4X_V4SF,              "stxvw4x_v4sf",   MEM)
+BU_VSX_X (STXVW4X_V4SI,              "stxvw4x_v4si",   MEM)
+BU_VSX_X (STXVW4X_V8HI,              "stxvw4x_v8hi",   MEM)
+BU_VSX_X (STXVW4X_V16QI,      "stxvw4x_v16qi", MEM)
+BU_VSX_X (LD_ELEMREV_V2DF,    "ld_elemrev_v2df",  MEM)
+BU_VSX_X (LD_ELEMREV_V2DI,    "ld_elemrev_v2di",  MEM)
+BU_VSX_X (LD_ELEMREV_V4SF,    "ld_elemrev_v4sf",  MEM)
+BU_VSX_X (LD_ELEMREV_V4SI,    "ld_elemrev_v4si",  MEM)
+BU_VSX_X (LD_ELEMREV_V8HI,    "ld_elemrev_v8hi",  MEM)
+BU_VSX_X (LD_ELEMREV_V16QI,   "ld_elemrev_v16qi", MEM)
+BU_VSX_X (ST_ELEMREV_V2DF,    "st_elemrev_v2df",  MEM)
+BU_VSX_X (ST_ELEMREV_V2DI,    "st_elemrev_v2di",  MEM)
+BU_VSX_X (ST_ELEMREV_V4SF,    "st_elemrev_v4sf",  MEM)
+BU_VSX_X (ST_ELEMREV_V4SI,    "st_elemrev_v4si",  MEM)
+BU_VSX_X (ST_ELEMREV_V8HI,    "st_elemrev_v8hi",  MEM)
+BU_VSX_X (ST_ELEMREV_V16QI,   "st_elemrev_v16qi", MEM)
 BU_VSX_X (XSABSDP,           "xsabsdp",        CONST)
 BU_VSX_X (XSADDDP,           "xsadddp",        FP)
 BU_VSX_X (XSCMPODP,          "xscmpodp",       FP)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1d0076c..1a6e016 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -14129,6 +14129,47 @@ altivec_expand_builtin (tree exp, rtx target, bool 
*expandedp)
     case VSX_BUILTIN_STXVW4X_V16QI:
       return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
 
+    /* For the following on big endian, it's ok to use any appropriate
+       unaligned-supporting store, so use a generic expander.  For
+       little-endian, the exact element-reversing instruction must
+       be used.  */
+    case VSX_BUILTIN_ST_ELEMREV_V2DF:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
+                              : CODE_FOR_vsx_st_elemrev_v2df);
+       return altivec_expand_stv_builtin (code, exp);
+      }
+    case VSX_BUILTIN_ST_ELEMREV_V2DI:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
+                              : CODE_FOR_vsx_st_elemrev_v2di);
+       return altivec_expand_stv_builtin (code, exp);
+      }
+    case VSX_BUILTIN_ST_ELEMREV_V4SF:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
+                              : CODE_FOR_vsx_st_elemrev_v4sf);
+       return altivec_expand_stv_builtin (code, exp);
+      }
+    case VSX_BUILTIN_ST_ELEMREV_V4SI:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
+                              : CODE_FOR_vsx_st_elemrev_v4si);
+       return altivec_expand_stv_builtin (code, exp);
+      }
+    case VSX_BUILTIN_ST_ELEMREV_V8HI:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
+                              : CODE_FOR_vsx_st_elemrev_v8hi);
+       return altivec_expand_stv_builtin (code, exp);
+      }
+    case VSX_BUILTIN_ST_ELEMREV_V16QI:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
+                              : CODE_FOR_vsx_st_elemrev_v16qi);
+       return altivec_expand_stv_builtin (code, exp);
+      }
+
     case ALTIVEC_BUILTIN_MFVSCR:
       icode = CODE_FOR_altivec_mfvscr;
       tmode = insn_data[icode].operand[0].mode;
@@ -14323,6 +14364,46 @@ altivec_expand_builtin (tree exp, rtx target, bool 
*expandedp)
     case VSX_BUILTIN_LXVW4X_V16QI:
       return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
                                        exp, target, false);
+    /* For the following on big endian, it's ok to use any appropriate
+       unaligned-supporting load, so use a generic expander.  For
+       little-endian, the exact element-reversing instruction must
+       be used.  */
+    case VSX_BUILTIN_LD_ELEMREV_V2DF:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
+                              : CODE_FOR_vsx_ld_elemrev_v2df);
+       return altivec_expand_lv_builtin (code, exp, target, false);
+      }
+    case VSX_BUILTIN_LD_ELEMREV_V2DI:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
+                              : CODE_FOR_vsx_ld_elemrev_v2di);
+       return altivec_expand_lv_builtin (code, exp, target, false);
+      }
+    case VSX_BUILTIN_LD_ELEMREV_V4SF:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
+                              : CODE_FOR_vsx_ld_elemrev_v4sf);
+       return altivec_expand_lv_builtin (code, exp, target, false);
+      }
+    case VSX_BUILTIN_LD_ELEMREV_V4SI:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
+                              : CODE_FOR_vsx_ld_elemrev_v4si);
+       return altivec_expand_lv_builtin (code, exp, target, false);
+      }
+    case VSX_BUILTIN_LD_ELEMREV_V8HI:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
+                              : CODE_FOR_vsx_ld_elemrev_v8hi);
+       return altivec_expand_lv_builtin (code, exp, target, false);
+      }
+    case VSX_BUILTIN_LD_ELEMREV_V16QI:
+      {
+       enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
+                              : CODE_FOR_vsx_ld_elemrev_v16qi);
+       return altivec_expand_lv_builtin (code, exp, target, false);
+      }
       break;
     default:
       break;
@@ -15816,6 +15897,36 @@ altivec_init_builtins (void)
               VSX_BUILTIN_STXVW4X_V8HI);
   def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
               VSX_BUILTIN_STXVW4X_V16QI);
+
+  def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
+              VSX_BUILTIN_LD_ELEMREV_V2DF);
+  def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
+              VSX_BUILTIN_LD_ELEMREV_V2DI);
+  def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
+              VSX_BUILTIN_LD_ELEMREV_V4SF);
+  def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
+              VSX_BUILTIN_LD_ELEMREV_V4SI);
+  def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
+              VSX_BUILTIN_ST_ELEMREV_V2DF);
+  def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
+              VSX_BUILTIN_ST_ELEMREV_V2DI);
+  def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
+              VSX_BUILTIN_ST_ELEMREV_V4SF);
+  def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
+              VSX_BUILTIN_ST_ELEMREV_V4SI);
+
+  if (TARGET_P9_VECTOR)
+    {
+      def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
+                  VSX_BUILTIN_LD_ELEMREV_V8HI);
+      def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
+                  VSX_BUILTIN_LD_ELEMREV_V16QI);
+      def_builtin ("__builtin_vsx_st_elemrev_v8hi",
+                  void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
+      def_builtin ("__builtin_vsx_st_elemrev_v16qi",
+                  void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
+    }
+
   def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
               VSX_BUILTIN_VEC_LD);
   def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 45af233..508eeac 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -887,6 +887,140 @@
   "VECTOR_MEM_VSX_P (<MODE>mode)"
   "")
 
+;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
+;; when you really want their element-reversing behavior.
+(define_insn "vsx_ld_elemrev_v2di"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+        (vec_select:V2DI
+         (match_operand:V2DI 1 "memory_operand" "Z")
+         (parallel [(const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "vsx_ld_elemrev_v2df"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
+        (vec_select:V2DF
+         (match_operand:V2DF 1 "memory_operand" "Z")
+         (parallel [(const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "vsx_ld_elemrev_v4si"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
+        (vec_select:V4SI
+         (match_operand:V4SI 1 "memory_operand" "Z")
+         (parallel [(const_int 3) (const_int 2)
+                    (const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
+  "lxvw4x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "vsx_ld_elemrev_v4sf"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+        (vec_select:V4SF
+         (match_operand:V4SF 1 "memory_operand" "Z")
+         (parallel [(const_int 3) (const_int 2)
+                    (const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
+  "lxvw4x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "vsx_ld_elemrev_v8hi"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+        (vec_select:V8HI
+         (match_operand:V8HI 1 "memory_operand" "Z")
+         (parallel [(const_int 7) (const_int 6)
+                    (const_int 5) (const_int 4)
+                    (const_int 3) (const_int 2)
+                    (const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
+  "lxvh8x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "vsx_ld_elemrev_v16qi"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+        (vec_select:V16QI
+         (match_operand:V16QI 1 "memory_operand" "Z")
+         (parallel [(const_int 15) (const_int 14)
+                    (const_int 13) (const_int 12)
+                    (const_int 11) (const_int 10)
+                    (const_int  9) (const_int  8)
+                    (const_int  7) (const_int  6)
+                    (const_int  5) (const_int  4)
+                    (const_int  3) (const_int  2)
+                    (const_int  1) (const_int  0)])))]
+  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
+  "lxvb16x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "vsx_st_elemrev_v2df"
+  [(set (match_operand:V2DF 0 "memory_operand" "=Z")
+        (vec_select:V2DF
+         (match_operand:V2DF 1 "vsx_register_operand" "wa")
+         (parallel [(const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "vsx_st_elemrev_v2di"
+  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
+        (vec_select:V2DI
+         (match_operand:V2DI 1 "vsx_register_operand" "wa")
+         (parallel [(const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "vsx_st_elemrev_v4sf"
+  [(set (match_operand:V4SF 0 "memory_operand" "=Z")
+        (vec_select:V4SF
+         (match_operand:V4SF 1 "vsx_register_operand" "wa")
+         (parallel [(const_int 3) (const_int 2)
+                    (const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
+  "stxvw4x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "vsx_st_elemrev_v4si"
+  [(set (match_operand:V4SI 0 "memory_operand" "=Z")
+        (vec_select:V4SI
+         (match_operand:V4SI 1 "vsx_register_operand" "wa")
+         (parallel [(const_int 3) (const_int 2)
+                    (const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
+  "stxvw4x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "vsx_st_elemrev_v8hi"
+  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
+        (vec_select:V8HI
+         (match_operand:V8HI 1 "vsx_register_operand" "wa")
+         (parallel [(const_int 7) (const_int 6)
+                    (const_int 5) (const_int 4)
+                    (const_int 3) (const_int 2)
+                    (const_int 1) (const_int 0)])))]
+  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
+  "stxvh8x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "vsx_st_elemrev_v16qi"
+  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+        (vec_select:V16QI
+         (match_operand:V16QI 1 "vsx_register_operand" "wa")
+         (parallel [(const_int 15) (const_int 14)
+                    (const_int 13) (const_int 12)
+                    (const_int 11) (const_int 10)
+                    (const_int  9) (const_int  8)
+                    (const_int  7) (const_int  6)
+                    (const_int  5) (const_int  4)
+                    (const_int  3) (const_int  2)
+                    (const_int  1) (const_int  0)])))]
+  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
+  "stxvb16x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
 
 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
 ;; instructions are now combined with the insn for the traditional floating
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-1.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-1.c
new file mode 100644
index 0000000..15fffcf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-1.c
@@ -0,0 +1,60 @@
+/* { dg-do compile { target { powerpc64le*-*-* } } } */
+/* { dg-skip-if "do not override mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O0" } */
+/* { dg-final { scan-assembler-times "lxvd2x" 6 } } */
+/* { dg-final { scan-assembler-times "lxvw4x" 2 } } */
+/* { dg-final { scan-assembler-times "stxvd2x" 6 } } */
+/* { dg-final { scan-assembler-times "stxvw4x" 2 } } */
+/* { dg-final { scan-assembler-times "xxpermdi" 8 } } */
+
+extern vector double vd, *vdp;
+extern vector signed long long vsll, *vsllp;
+extern vector unsigned long long vull, *vullp;
+extern vector float vf, *vfp;
+extern vector signed int vsi, *vsip;
+extern vector unsigned int vui, *vuip;
+extern vector signed short vss, *vssp;
+extern vector unsigned short vus, *vusp;
+extern vector signed char vsc, *vscp;
+extern vector unsigned char vuc, *vucp;
+
+void foo0 (void)
+{
+  vd = __builtin_vsx_ld_elemrev_v2df (0, vdp);
+}
+
+void foo1 (void)
+{
+  vsll = __builtin_vsx_ld_elemrev_v2di (0, vsllp);
+}
+
+void foo2 (void)
+{
+  vf = __builtin_vsx_ld_elemrev_v4sf (0, vfp);
+}
+
+void foo3 (void)
+{
+  vsi = __builtin_vsx_ld_elemrev_v4si (0, vsip);
+}
+
+void foo6 (void)
+{
+  __builtin_vsx_st_elemrev_v2df (vd, 0, vdp);
+}
+
+void foo7 (void)
+{
+  __builtin_vsx_st_elemrev_v2di (vsll, 0, vsllp);
+}
+
+void foo8 (void)
+{
+  __builtin_vsx_st_elemrev_v4sf (vf, 0, vfp);
+}
+
+void foo9 (void)
+{
+  __builtin_vsx_st_elemrev_v4si (vsi, 0, vsip);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-2.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-2.c
new file mode 100644
index 0000000..331a696
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-2.c
@@ -0,0 +1,83 @@
+/* { dg-do compile { target { powerpc64le*-*-* } } } */
+/* { dg-skip-if "do not override mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O0" } */
+/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */
+/* { dg-final { scan-assembler-times "lxvw4x" 2 } } */
+/* { dg-final { scan-assembler-times "lxvh8x" 1 } } */
+/* { dg-final { scan-assembler-times "lxvb16x" 1 } } */
+/* { dg-final { scan-assembler-times "stxvd2x" 2 } } */
+/* { dg-final { scan-assembler-times "stxvw4x" 2 } } */
+/* { dg-final { scan-assembler-times "stxvh8x" 1 } } */
+/* { dg-final { scan-assembler-times "stxvb16x" 1 } } */
+
+extern vector double vd, *vdp;
+extern vector signed long long vsll, *vsllp;
+extern vector unsigned long long vull, *vullp;
+extern vector float vf, *vfp;
+extern vector signed int vsi, *vsip;
+extern vector unsigned int vui, *vuip;
+extern vector signed short vss, *vssp;
+extern vector unsigned short vus, *vusp;
+extern vector signed char vsc, *vscp;
+extern vector unsigned char vuc, *vucp;
+
+void foo0 (void)
+{
+  vd = __builtin_vsx_ld_elemrev_v2df (0, vdp);
+}
+
+void foo1 (void)
+{
+  vsll = __builtin_vsx_ld_elemrev_v2di (0, vsllp);
+}
+
+void foo2 (void)
+{
+  vf = __builtin_vsx_ld_elemrev_v4sf (0, vfp);
+}
+
+void foo3 (void)
+{
+  vsi = __builtin_vsx_ld_elemrev_v4si (0, vsip);
+}
+
+void foo4 (void)
+{
+  vss = __builtin_vsx_ld_elemrev_v8hi (0, vssp);
+}
+
+void foo5 (void)
+{
+  vsc = __builtin_vsx_ld_elemrev_v16qi (0, vscp);
+}
+
+void foo6 (void)
+{
+  __builtin_vsx_st_elemrev_v2df (vd, 0, vdp);
+}
+
+void foo7 (void)
+{
+  __builtin_vsx_st_elemrev_v2di (vsll, 0, vsllp);
+}
+
+void foo8 (void)
+{
+  __builtin_vsx_st_elemrev_v4sf (vf, 0, vfp);
+}
+
+void foo9 (void)
+{
+  __builtin_vsx_st_elemrev_v4si (vsi, 0, vsip);
+}
+
+void foo10 (void)
+{
+  __builtin_vsx_st_elemrev_v8hi (vss, 0, vssp);
+}
+
+void foo11 (void)
+{
+  __builtin_vsx_st_elemrev_v16qi (vsc, 0, vscp);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-3.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-3.c
new file mode 100644
index 0000000..5c39e43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-3.c
@@ -0,0 +1,59 @@
+/* { dg-do compile { target { powerpc64-*-* } } } */
+/* { dg-skip-if "do not override mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O0" } */
+/* { dg-final { scan-assembler-times "lxvd2x" 6 } } */
+/* { dg-final { scan-assembler-times "lxvw4x" 2 } } */
+/* { dg-final { scan-assembler-times "stxvd2x" 6 } } */
+/* { dg-final { scan-assembler-times "stxvw4x" 2 } } */
+
+extern vector double vd, *vdp;
+extern vector signed long long vsll, *vsllp;
+extern vector unsigned long long vull, *vullp;
+extern vector float vf, *vfp;
+extern vector signed int vsi, *vsip;
+extern vector unsigned int vui, *vuip;
+extern vector signed short vss, *vssp;
+extern vector unsigned short vus, *vusp;
+extern vector signed char vsc, *vscp;
+extern vector unsigned char vuc, *vucp;
+
+void foo0 (void)
+{
+  vd = __builtin_vsx_ld_elemrev_v2df (0, vdp);
+}
+
+void foo1 (void)
+{
+  vsll = __builtin_vsx_ld_elemrev_v2di (0, vsllp);
+}
+
+void foo2 (void)
+{
+  vf = __builtin_vsx_ld_elemrev_v4sf (0, vfp);
+}
+
+void foo3 (void)
+{
+  vsi = __builtin_vsx_ld_elemrev_v4si (0, vsip);
+}
+
+void foo6 (void)
+{
+  __builtin_vsx_st_elemrev_v2df (vd, 0, vdp);
+}
+
+void foo7 (void)
+{
+  __builtin_vsx_st_elemrev_v2di (vsll, 0, vsllp);
+}
+
+void foo8 (void)
+{
+  __builtin_vsx_st_elemrev_v4sf (vf, 0, vfp);
+}
+
+void foo9 (void)
+{
+  __builtin_vsx_st_elemrev_v4si (vsi, 0, vsip);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-4.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-4.c
new file mode 100644
index 0000000..2d80a02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-elemrev-4.c
@@ -0,0 +1,77 @@
+/* { dg-do compile { target { powerpc64-*-* } } } */
+/* { dg-skip-if "do not override mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O0" } */
+/* { dg-final { scan-assembler-times "lxvx" 12 } } */
+/* { dg-final { scan-assembler-times "stxvx" 12 } } */
+
+extern vector double vd, *vdp;
+extern vector signed long long vsll, *vsllp;
+extern vector unsigned long long vull, *vullp;
+extern vector float vf, *vfp;
+extern vector signed int vsi, *vsip;
+extern vector unsigned int vui, *vuip;
+extern vector signed short vss, *vssp;
+extern vector unsigned short vus, *vusp;
+extern vector signed char vsc, *vscp;
+extern vector unsigned char vuc, *vucp;
+
+void foo0 (void)
+{
+  vd = __builtin_vsx_ld_elemrev_v2df (0, vdp);
+}
+
+void foo1 (void)
+{
+  vsll = __builtin_vsx_ld_elemrev_v2di (0, vsllp);
+}
+
+void foo2 (void)
+{
+  vf = __builtin_vsx_ld_elemrev_v4sf (0, vfp);
+}
+
+void foo3 (void)
+{
+  vsi = __builtin_vsx_ld_elemrev_v4si (0, vsip);
+}
+
+void foo4 (void)
+{
+  vss = __builtin_vsx_ld_elemrev_v8hi (0, vssp);
+}
+
+void foo5 (void)
+{
+  vsc = __builtin_vsx_ld_elemrev_v16qi (0, vscp);
+}
+
+void foo6 (void)
+{
+  __builtin_vsx_st_elemrev_v2df (vd, 0, vdp);
+}
+
+void foo7 (void)
+{
+  __builtin_vsx_st_elemrev_v2di (vsll, 0, vsllp);
+}
+
+void foo8 (void)
+{
+  __builtin_vsx_st_elemrev_v4sf (vf, 0, vfp);
+}
+
+void foo9 (void)
+{
+  __builtin_vsx_st_elemrev_v4si (vsi, 0, vsip);
+}
+
+void foo10 (void)
+{
+  __builtin_vsx_st_elemrev_v8hi (vss, 0, vssp);
+}
+
+void foo11 (void)
+{
+  __builtin_vsx_st_elemrev_v16qi (vsc, 0, vscp);
+}
+


Reply via email to