Hi! At least the library routines for these builtins can't deal with non-generic address spaces, so we shouldn't transform loops setting or copying to and/or from such address spaces into the builtins.
Bootstrapped/regtested on x86_64-linux and i686-linux, preapproved by Richard in the PR, committed to trunk. 2017-02-21 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/79649 * tree-loop-distribution.c (classify_partition): Give up on non-generic address space loads/stores. * gcc.target/i386/pr79649.c: New test. --- gcc/tree-loop-distribution.c.jj 2017-01-30 09:31:47.000000000 +0100 +++ gcc/tree-loop-distribution.c 2017-02-21 09:31:52.484838050 +0100 @@ -1072,6 +1072,13 @@ classify_partition (loop_p loop, struct /* But exactly one store and/or load. */ for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j) { + tree type = TREE_TYPE (DR_REF (dr)); + + /* The memset, memcpy and memmove library calls are only + able to deal with generic address space. */ + if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type))) + return; + if (DR_IS_READ (dr)) { if (single_load != NULL) --- gcc/testsuite/gcc.target/i386/pr79649.c.jj 2017-02-21 09:49:51.404547952 +0100 +++ gcc/testsuite/gcc.target/i386/pr79649.c 2017-02-21 09:49:01.000000000 +0100 @@ -0,0 +1,53 @@ +/* PR tree-optimization/79649 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-not "__builtin_memset" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "__builtin_memcpy" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "__builtin_memmove" "optimized" } } */ + +typedef __SIZE_TYPE__ size_t; + +void +f1 (unsigned char __seg_gs *s, size_t n) +{ + for (size_t i = 0; i < n; ++i) + s[i] = 0; +} + +void +f2 (unsigned char __seg_gs *__restrict d, unsigned char __seg_gs *__restrict s, size_t n) +{ + for (size_t i = 0; i < n; ++i) + d[i] = s[i]; +} + +void +f3 (unsigned char __seg_gs *__restrict d, unsigned char *__restrict s, size_t n) +{ + for (size_t i = 0; i < n; ++i) + d[i] = s[i]; +} + +void +f4 (unsigned char *__restrict d, unsigned char __seg_gs *__restrict s, size_t n) +{ + for (size_t i = 0; i < n; ++i) + d[i] = s[i]; +} + +void +f5 (unsigned char __seg_gs *__restrict d, unsigned char __seg_fs *__restrict s, size_t n) +{ + for (size_t i = 0; i < n; ++i) + d[i] = s[i]; +} + +struct A { int a; char b[1024]; }; +extern struct A __seg_gs a; + +void +f6 (size_t n) +{ + for (size_t i = 0; i < n; ++i) + a.b[i] = 0; +} Jakub