wide-int fails to build libitm because of a bad interaction between: /* Keep the OI and XI modes from confusing the compiler into thinking that these modes could actually be used for computation. They are only holders for vectors during data movement. */ #define MAX_BITSIZE_MODE_ANY_INT (128)
and the memcpy folding code: /* Make sure we are not copying using a floating-point mode or a type whose size possibly does not match its precision. */ if (FLOAT_MODE_P (TYPE_MODE (desttype)) || TREE_CODE (desttype) == BOOLEAN_TYPE || TREE_CODE (desttype) == ENUMERAL_TYPE) { /* A more suitable int_mode_for_mode would return a vector integer mode for a vector float mode or a integer complex mode for a float complex mode if there isn't a regular integer mode covering the mode of desttype. */ enum machine_mode mode = int_mode_for_mode (TYPE_MODE (desttype)); if (mode == BLKmode) desttype = NULL_TREE; else desttype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); } if (FLOAT_MODE_P (TYPE_MODE (srctype)) || TREE_CODE (srctype) == BOOLEAN_TYPE || TREE_CODE (srctype) == ENUMERAL_TYPE) { enum machine_mode mode = int_mode_for_mode (TYPE_MODE (srctype)); if (mode == BLKmode) srctype = NULL_TREE; else srctype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); } The failure occurs for complex long double, which we try to copy as a 256-bit integer type (OImode). This patch tries to do what the comment suggests by introducing a new form of int_mode_for_mode that replaces vector modes with vector modes and complex modes with complex modes. The fallback case of using a MODE_INT is limited by MAX_FIXED_MODE_SIZE, so can never go above 128 bits on x86_64. The question then is what to do about 128-bit types for i386. MAX_FIXED_MODE_SIZE is 64 there, which says that int128_t shouldn't be used for optimisation. However, gcc.target/i386/pr49168-1.c only passes for -m32 -msse2 because we use int128_t to copy a float128_t. I handled that by allowing MODE_VECTOR_INT to be used instead of MODE_INT if the mode size is greater than MAX_FIXED_MODE_SIZE, even if the original type wasn't a vector. It might be that other callers to int_mode_for_mode should use the new function too, but I'll look at that separately. I used the attached testcase (with printfs added to gcc) to check that the right modes and types were being chosen. The patch fixes the complex float and complex double cases, since the integer type that we previously picked had a larger alignment than the original complex type. One possibly subtle side-effect of FLOAT_MODE_P (TYPE_MODE (desttype)) is that vectors are copied as integer vectors if the target supports them directly but are copied as float vectors otherwise, since in the latter case the mode will be BLKmode. E.g. the 1024-bit vectors in the test are copied as vector floats and vector doubles both before and after the patch. Tested against trunk with x86_64-linux-gnu {,-m32}. OK to install? Thanks, Richard gcc/ * machmode.h (bitwise_mode_for_size): Declare. * stor-layout.h (bitwise_type_for_mode): Likewise. * stor-layout.c (bitwise_mode_for_size): New function. (bitwise_type_for_mode): Likewise. * builtins.c (fold_builtin_memory_op): Use it instead of int_mode_for_mode and build_nonstandard_integer_type. gcc/testsuite/ * gcc.dg/memcpy-5.c: New test. Index: gcc/machmode.h =================================================================== --- gcc/machmode.h 2014-04-18 11:16:12.706092658 +0100 +++ gcc/machmode.h 2014-04-18 11:16:38.179299261 +0100 @@ -253,6 +253,8 @@ extern enum machine_mode smallest_mode_f extern enum machine_mode int_mode_for_mode (enum machine_mode); +extern enum machine_mode bitwise_mode_for_size (enum machine_mode); + /* Return a mode that is suitable for representing a vector, or BLKmode on failure. */ Index: gcc/stor-layout.h =================================================================== --- gcc/stor-layout.h 2014-04-18 11:16:12.707092667 +0100 +++ gcc/stor-layout.h 2014-04-18 11:16:12.830093665 +0100 @@ -98,6 +98,8 @@ extern tree make_unsigned_type (int); mode_for_size, but is passed a tree. */ extern enum machine_mode mode_for_size_tree (const_tree, enum mode_class, int); +extern tree bitwise_type_for_mode (enum machine_mode); + /* Given a VAR_DECL, PARM_DECL or RESULT_DECL, clears the results of a previous call to layout_decl and calls it again. */ extern void relayout_decl (tree); Index: gcc/stor-layout.c =================================================================== --- gcc/stor-layout.c 2014-04-18 11:16:12.707092667 +0100 +++ gcc/stor-layout.c 2014-04-18 16:08:20.033752312 +0100 @@ -403,6 +403,73 @@ int_mode_for_mode (enum machine_mode mod return mode; } +/* Find a mode that can be used for efficient bitwise operations on MODE. + Return BLKmode if no such mode exists. */ + +enum machine_mode +bitwise_mode_for_mode (enum machine_mode mode) +{ + /* Quick exit if we already have a suitable mode. */ + unsigned int bitsize = GET_MODE_BITSIZE (mode); + if (SCALAR_INT_MODE_P (mode) && bitsize <= MAX_FIXED_MODE_SIZE) + return mode; + + /* Reuse the sanity checks from int_mode_for_mode. */ + gcc_checking_assert ((int_mode_for_mode (mode), true)); + + /* Try to replace complex modes with complex modes. In general we + expect both components to be processed independently, so we only + care whether there is a register for the inner mode. */ + if (COMPLEX_MODE_P (mode)) + { + enum machine_mode trial = mode; + if (GET_MODE_CLASS (mode) != MODE_COMPLEX_INT) + trial = mode_for_size (bitsize, MODE_COMPLEX_INT, false); + if (trial != BLKmode + && have_regs_of_mode[GET_MODE_INNER (trial)]) + return trial; + } + + /* Try to replace vector modes with vector modes. Also try using vector + modes if an integer mode would be too big. */ + if (VECTOR_MODE_P (mode) || bitsize > MAX_FIXED_MODE_SIZE) + { + enum machine_mode trial = mode; + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + trial = mode_for_size (bitsize, MODE_VECTOR_INT, 0); + if (trial != BLKmode + && have_regs_of_mode[trial] + && targetm.vector_mode_supported_p (trial)) + return trial; + } + + /* Otherwise fall back on integers while honoring MAX_FIXED_MODE_SIZE. */ + return mode_for_size (bitsize, MODE_INT, true); +} + +/* Find a type that can be used for efficient bitwise operations on MODE. + Return null if no such mode exists. */ + +tree +bitwise_type_for_mode (enum machine_mode mode) +{ + mode = bitwise_mode_for_mode (mode); + if (mode == BLKmode) + return NULL_TREE; + + unsigned int inner_size = GET_MODE_UNIT_BITSIZE (mode); + tree inner_type = build_nonstandard_integer_type (inner_size, true); + + if (VECTOR_MODE_P (mode)) + return build_vector_type_for_mode (inner_type, mode); + + if (COMPLEX_MODE_P (mode)) + return build_complex_type (inner_type); + + gcc_checking_assert (GET_MODE_INNER (mode) == VOIDmode); + return inner_type; +} + /* Find a mode that is suitable for representing a vector with NUNITS elements of mode INNERMODE. Returns BLKmode if there is no suitable mode. */ Index: gcc/builtins.c =================================================================== --- gcc/builtins.c 2014-04-18 11:16:12.676092416 +0100 +++ gcc/builtins.c 2014-04-18 11:22:50.107315234 +0100 @@ -8921,29 +8921,11 @@ fold_builtin_memory_op (location_t loc, if (FLOAT_MODE_P (TYPE_MODE (desttype)) || TREE_CODE (desttype) == BOOLEAN_TYPE || TREE_CODE (desttype) == ENUMERAL_TYPE) - { - /* A more suitable int_mode_for_mode would return a vector - integer mode for a vector float mode or a integer complex - mode for a float complex mode if there isn't a regular - integer mode covering the mode of desttype. */ - enum machine_mode mode = int_mode_for_mode (TYPE_MODE (desttype)); - if (mode == BLKmode) - desttype = NULL_TREE; - else - desttype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), - 1); - } + desttype = bitwise_type_for_mode (TYPE_MODE (desttype)); if (FLOAT_MODE_P (TYPE_MODE (srctype)) || TREE_CODE (srctype) == BOOLEAN_TYPE || TREE_CODE (srctype) == ENUMERAL_TYPE) - { - enum machine_mode mode = int_mode_for_mode (TYPE_MODE (srctype)); - if (mode == BLKmode) - srctype = NULL_TREE; - else - srctype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), - 1); - } + srctype = bitwise_type_for_mode (TYPE_MODE (srctype)); if (!srctype) srctype = desttype; if (!desttype) Index: gcc/testsuite/gcc.dg/memcpy-5.c =================================================================== --- /dev/null 2014-04-15 08:10:27.294524132 +0100 +++ gcc/testsuite/gcc.dg/memcpy-5.c 2014-04-18 11:21:52.036844396 +0100 @@ -0,0 +1,27 @@ +/* { dg-options "-O -fdump-tree-optimized" } */ + +extern void *memcpy (void *, const void *, __SIZE_TYPE__); + +#define TEST(NAME, TYPE) \ + TYPE NAME##x; \ + char NAME##y[sizeof (NAME##x)] __attribute__((aligned (__alignof__ (NAME##x)))); \ + void NAME (void) { memcpy (&NAME##x, &NAME##y, sizeof (NAME##x)); } + +TEST (f, float); +TEST (d, double); +TEST (ld, long double); +TEST (cf, _Complex float); +TEST (cd, _Complex double); +TEST (cld, _Complex long double); +TEST (d8f, float __attribute__((vector_size (8)))); +TEST (d16f, float __attribute__((vector_size (16)))); +TEST (d32f, float __attribute__((vector_size (32)))); +TEST (d64f, float __attribute__((vector_size (64)))); +TEST (d128f, float __attribute__((vector_size (128)))); +TEST (d16d, double __attribute__((vector_size (16)))); +TEST (d32d, double __attribute__((vector_size (32)))); +TEST (d64d, double __attribute__((vector_size (64)))); +TEST (d128d, double __attribute__((vector_size (128)))); + +/* { dg-final { scan-tree-dump-not "memcpy" "optimized" { target x86_64-*-* } } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */