> @@ -633,12 +642,17 @@ c_strlen (tree src, int only_value) > return ssize_int (0); > > /* We don't know the starting offset, but we do know that the string > - has no internal zero bytes. We can assume that the offset falls > - within the bounds of the string; otherwise, the programmer deserves > - what he gets. Subtract the offset from the length of the string, > - and return that. This would perhaps not be valid if we were dealing > - with named arrays in addition to literal string constants. */ > - return size_diffop_loc (loc, size_int (maxelts * eltsize), byteoff); > + has no internal zero bytes. If the offset falls within the bounds > + of the string subtract the offset from the length of the string, > + and return that. Otherwise the length is zero. Take care to > + use SAVE_EXPR in case the OFFSET has side-effects. */ > + tree offsave = TREE_SIDE_EFFECTS (byteoff) ? save_expr (byteoff) : > byteoff; > + offsave = fold_convert (ssizetype, offsave); > + tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, > offsave, > + build_int_cst (ssizetype, len * eltsize)); > + tree lenexp = size_diffop_loc (loc, ssize_int (strelts * eltsize), > offsave); > + return fold_build3_loc (loc, COND_EXPR, ssizetype, condexp, lenexp, > + build_zero_cst (ssizetype));
This computes the number of bytes. c_strlen is supposed to return number of (wide) characters: /* Compute the length of a null-terminated character string or wide character string handling character sizes of 1, 2, and 4 bytes. TREE_STRING_LENGTH is not the right way because it evaluates to the size of the character array in bytes (as opposed to characters) and because it can contain a zero byte in the middle. > @@ -11343,16 +11356,15 @@ string_constant (tree arg, tree *ptr_offset) > { > if (TREE_CODE (TREE_TYPE (array)) != ARRAY_TYPE) > return NULL_TREE; > - if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (array)))) > - { > - /* Add the scaled variable index to the constant offset. */ > - tree eltoff = fold_build2 (MULT_EXPR, TREE_TYPE (offset), > - fold_convert (sizetype, varidx), > - eltsize); > - offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, eltoff); > - } > - else > - return NULL_TREE; > + > + while (TREE_CODE (chartype) != INTEGER_TYPE) > + chartype = TREE_TYPE (chartype); > + > + /* Set the non-constant offset to the non-constant index scaled > + by the size of the character type. */ > + offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), > + fold_convert (sizetype, varidx), > + TYPE_SIZE_UNIT (chartype)); here you fix the computation for wide character strings, but I see no test cases with wide character stings. But down here you use a non-wide character function on a wide character string: /* Avoid returning a string that doesn't fit in the array it is stored in, like const char a[4] = "abcde"; but do handle those that fit even if they have excess initializers, such as in const char a[4] = "abc\000\000"; The excess elements contribute to TREE_STRING_LENGTH() but not to strlen(). */ unsigned HOST_WIDE_INT length = strnlen (TREE_STRING_POINTER (init), TREE_STRING_LENGTH (init)); Actually I begin to wonder, if all this wide character stuff is really so common that we have to optimize it. Same for the strlen(&a[0][i]), does this happen really so often that it is a worth the risk? Bernd.