Because of POWER9 dd2.1 issues with certain unaligned vsx instructions
to cache inhibited memory, here is a patch that keeps memmove (and memcpy)
inline expansion from doing unaligned vector or using vector load/store
other than lvx/stvx. More description of the issue is here:

https://patchwork.ozlabs.org/patch/814059/

OK for trunk if bootstrap/regtest ok?

Thanks!
   Aaron

2018-12-19  Aaron Sawdey  <acsaw...@linux.ibm.com>

        * config/rs6000/rs6000-string.c (expand_block_move): Don't use
        unaligned vsx and avoid lxvd2x/stxvd2x.
        (gen_lvx_v4si_move): New function.


Index: gcc/config/rs6000/rs6000-string.c
===================================================================
--- gcc/config/rs6000/rs6000-string.c   (revision 267055)
+++ gcc/config/rs6000/rs6000-string.c   (working copy)
@@ -2669,6 +2669,35 @@
   return true;
 }

+/* Generate loads and stores for a move of v4si mode using lvx/stvx.
+   This uses altivec_{l,st}vx_<mode>_internal which use unspecs to
+   keep combine from changing what instruction gets used.
+
+   DEST is the destination for the data.
+   SRC is the source of the data for the move.  */
+
+static rtx
+gen_lvx_v4si_move (rtx dest, rtx src)
+{
+  rtx rv = NULL;
+  if (MEM_P (dest))
+    {
+      gcc_assert (!MEM_P (src));
+      gcc_assert (GET_MODE (src) == V4SImode);
+      rv = gen_altivec_stvx_v4si_internal (dest, src);
+    }
+  else if (MEM_P (src))
+    {
+      gcc_assert (!MEM_P (dest));
+      gcc_assert (GET_MODE (dest) == V4SImode);
+      rv = gen_altivec_lvx_v4si_internal (dest, src);
+    }
+  else
+    gcc_unreachable ();
+
+  return rv;
+}
+
 /* Expand a block move operation, and return 1 if successful.  Return 0
    if we should let the compiler generate normal code.

@@ -2721,11 +2750,11 @@

       /* Altivec first, since it will be faster than a string move
         when it applies, and usually not significantly larger.  */
-      if (TARGET_ALTIVEC && bytes >= 16 && (TARGET_EFFICIENT_UNALIGNED_VSX || 
align >= 128))
+      if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
        {
          move_bytes = 16;
          mode = V4SImode;
-         gen_func.mov = gen_movv4si;
+         gen_func.mov = gen_lvx_v4si_move;
        }
       else if (bytes >= 8 && TARGET_POWERPC64
               && (align >= 64 || !STRICT_ALIGNMENT))



-- 
Aaron Sawdey, Ph.D.  acsaw...@linux.vnet.ibm.com
050-2/C113  (507) 253-7520 home: 507/263-0782
IBM Linux Technology Center - PPC Toolchain

Reply via email to