The following patch cures the missed optimization in PR82084, vectorizing a wide-char initializer
wchar_t strs[4][2]= { L"A", L"B", L"C" , L"D"}; with AVX to MEM[(wchar_t[2] *)&strs] = { 65, 66, 67, 68 }; it's not entirely clear to me whether the individual STRING_CSTs we build for the gimplified code strs[0] = "A"; strs[1] = "B"; strs[2] = "C"; strs[3] = "D"; always have a consistend "character" size and how the individual "characters" are encoded. The patch assumes that the array element type of the STRING_CST can be used to get access to individual characters by means of the element type size and those elements are stored in host byteorder. Which means the patch simply handles 16bit and 32bit "characters" as 16bit and 32bit integers and encodes them with the same rules as such integers. Joseph, are there more considerations for encoding the target representation of STRING_CSTs? Looks I was too lazy to lookup answers to those questions from the RTL expansion code which hopefully outputs constant initializers properly. Apart from vectorization in the mentioned testcase we also gain constant folding from pices from this change (but I don't adjust fold_read_from_constant_string yet). Thanks, Richard. 2017-09-04 Richard Biener <rguent...@suse.de> PR tree-optimization/82084 * fold-const.c (native_encode_string): Handle wide characters. (can_native_encode_string_p): Likewise. Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c (revision 251661) +++ gcc/fold-const.c (working copy) @@ -7187,26 +7187,71 @@ native_encode_string (const_tree expr, u if (! can_native_encode_string_p (expr)) return 0; - HOST_WIDE_INT total_bytes = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (expr))); + tree type = TREE_TYPE (expr); + HOST_WIDE_INT total_bytes = tree_to_shwi (TYPE_SIZE_UNIT (type)); + int orig_off = off; if ((off == -1 && total_bytes > len) || off >= total_bytes) return 0; if (off == -1) off = 0; - if (TREE_STRING_LENGTH (expr) - off < MIN (total_bytes, len)) + + HOST_WIDE_INT elsz = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); + if (elsz == 1) { - int written = 0; - if (off < TREE_STRING_LENGTH (expr)) + if (TREE_STRING_LENGTH (expr) - off < MIN (total_bytes, len)) { - written = MIN (len, TREE_STRING_LENGTH (expr) - off); - memcpy (ptr, TREE_STRING_POINTER (expr) + off, written); + int written = 0; + if (off < TREE_STRING_LENGTH (expr)) + { + written = MIN (len, TREE_STRING_LENGTH (expr) - off); + memcpy (ptr, TREE_STRING_POINTER (expr) + off, written); + } + memset (ptr + written, 0, + MIN (total_bytes - written, len - written)); } - memset (ptr + written, 0, - MIN (total_bytes - written, len - written)); + else + memcpy (ptr, TREE_STRING_POINTER (expr) + off, MIN (total_bytes, len)); + return MIN (total_bytes - off, len); } else - memcpy (ptr, TREE_STRING_POINTER (expr) + off, MIN (total_bytes, len)); - return MIN (total_bytes - off, len); + { + tree ielt = build_nonstandard_integer_type (elsz * 8, true); + int offset = 0; + bool first = true; + for (int o = off & ~(elsz - 1); o < total_bytes; o += elsz) + { + unsigned HOST_WIDE_INT c; + switch (elsz) + { + case 2: + { + uint16_t s; + memcpy (&s, TREE_STRING_POINTER (expr) + o, 2); + c = s; + break; + } + case 4: + { + uint32_t i; + memcpy (&i, TREE_STRING_POINTER (expr) + o, 4); + c = i; + break; + } + default: + gcc_unreachable (); + } + tree elem = build_int_cstu (ielt, c); + int res = native_encode_expr (elem, ptr+offset, len-offset, + first ? off & (elsz - 1) : 0); + if ((orig_off == -1 && res != elsz) + || res == 0) + return 0; + offset += res; + first = false; + } + return offset; + } } @@ -7491,10 +7536,11 @@ can_native_encode_string_p (const_tree e if (TREE_CODE (type) != ARRAY_TYPE || TREE_CODE (TREE_TYPE (type)) != INTEGER_TYPE - || (GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (TREE_TYPE (type))) - != BITS_PER_UNIT) || !tree_fits_shwi_p (TYPE_SIZE_UNIT (type))) return false; + HOST_WIDE_INT elsz = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); + if (elsz != 1 && elsz != 2 && elsz != 4) + return false; return true; }