Hi!

At least the library routines for these builtins can't deal with non-generic
address spaces, so we shouldn't transform loops setting or copying to and/or
from such address spaces into the builtins.

Bootstrapped/regtested on x86_64-linux and i686-linux, preapproved by
Richard in the PR, committed to trunk.

2017-02-21  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/79649
        * tree-loop-distribution.c (classify_partition): Give up on
        non-generic address space loads/stores.

        * gcc.target/i386/pr79649.c: New test.

--- gcc/tree-loop-distribution.c.jj     2017-01-30 09:31:47.000000000 +0100
+++ gcc/tree-loop-distribution.c        2017-02-21 09:31:52.484838050 +0100
@@ -1072,6 +1072,13 @@ classify_partition (loop_p loop, struct
       /* But exactly one store and/or load.  */
       for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
        {
+         tree type = TREE_TYPE (DR_REF (dr));
+
+         /* The memset, memcpy and memmove library calls are only
+            able to deal with generic address space.  */
+         if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
+           return;
+
          if (DR_IS_READ (dr))
            {
              if (single_load != NULL)
--- gcc/testsuite/gcc.target/i386/pr79649.c.jj  2017-02-21 09:49:51.404547952 
+0100
+++ gcc/testsuite/gcc.target/i386/pr79649.c     2017-02-21 09:49:01.000000000 
+0100
@@ -0,0 +1,53 @@
+/* PR tree-optimization/79649 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "__builtin_memset" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_memcpy" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_memmove" "optimized" } } */
+
+typedef __SIZE_TYPE__ size_t;
+
+void
+f1 (unsigned char __seg_gs *s, size_t n)
+{
+  for (size_t i = 0; i < n; ++i)
+    s[i] = 0;
+}
+
+void
+f2 (unsigned char __seg_gs *__restrict d, unsigned char __seg_gs *__restrict 
s, size_t n)
+{
+  for (size_t i = 0; i < n; ++i)
+    d[i] = s[i];
+}
+
+void
+f3 (unsigned char __seg_gs *__restrict d, unsigned char *__restrict s, size_t 
n)
+{
+  for (size_t i = 0; i < n; ++i)
+    d[i] = s[i];
+}
+
+void
+f4 (unsigned char *__restrict d, unsigned char __seg_gs *__restrict s, size_t 
n)
+{
+  for (size_t i = 0; i < n; ++i)
+    d[i] = s[i];
+}
+
+void
+f5 (unsigned char __seg_gs *__restrict d, unsigned char __seg_fs *__restrict 
s, size_t n)
+{
+  for (size_t i = 0; i < n; ++i)
+    d[i] = s[i];
+}
+
+struct A { int a; char b[1024]; };
+extern struct A __seg_gs a;
+
+void
+f6 (size_t n)
+{
+  for (size_t i = 0; i < n; ++i)
+    a.b[i] = 0;
+}

        Jakub

Reply via email to