hubert.reinterpretcast created this revision. hubert.reinterpretcast added reviewers: rsmith, aaron.ballman, hfinkel.
`memchr` and `memcmp` operate upon the character units of the object representation; that is, the `size_t` parameter expresses the number of character units. The constant folding implementation is updated in this patch to account for multibyte element types in the arrays passed to `memchr`/`memcmp` and, in the case of `memcmp`, to account for the possibility that the arrays may have differing element types (even when they are byte-sized). Actual inspection of the object representation is not implemented. Comparisons are done only between elements with the same object size; that is, `memchr` will fail when inspecting at least one character unit of a multibyte element. The integer types are assumed to have two's complement representation with 0 for `false`, 1 for `true`, and no padding bits. `memcmp` on multibyte elements will only be able to fold in cases where enough elements are equal for the answer to be 0. CodeGen tests are added for cases that miscompile on some system or other prior to this patch. Repository: rC Clang https://reviews.llvm.org/D55510 Files: include/clang/Basic/DiagnosticASTKinds.td lib/AST/ExprConstant.cpp test/CodeGenCXX/builtins.cpp test/SemaCXX/constexpr-string.cpp
Index: test/SemaCXX/constexpr-string.cpp =================================================================== --- test/SemaCXX/constexpr-string.cpp +++ test/SemaCXX/constexpr-string.cpp @@ -95,6 +95,51 @@ static_assert(__builtin_memcmp("abab\0banana", "abab\0canada", 6) == -1); static_assert(__builtin_memcmp("abab\0banana", "abab\0canada", 5) == 0); + extern struct Incomplete incomplete; + static_assert(__builtin_memcmp(&incomplete, "", 0u) == 0); + static_assert(__builtin_memcmp("", &incomplete, 0u) == 0); + static_assert(__builtin_memcmp(&incomplete, "", 1u) == 42); // expected-error {{not an integral constant}} expected-note {{read of incomplete type 'struct Incomplete'}} + static_assert(__builtin_memcmp("", &incomplete, 1u) == 42); // expected-error {{not an integral constant}} expected-note {{read of incomplete type 'struct Incomplete'}} + + constexpr unsigned char ku00fe00[] = {0x00, 0xfe, 0x00}; + constexpr unsigned char ku00feff[] = {0x00, 0xfe, 0xff}; + constexpr signed char ks00fe00[] = {0, -2, 0}; + constexpr signed char ks00feff[] = {0, -2, -1}; + static_assert(__builtin_memcmp(ku00feff, ks00fe00, 2) == 0); + static_assert(__builtin_memcmp(ku00feff, ks00fe00, 99) == 1); + static_assert(__builtin_memcmp(ku00fe00, ks00feff, 99) == -1); + static_assert(__builtin_memcmp(ks00feff, ku00fe00, 2) == 0); + static_assert(__builtin_memcmp(ks00feff, ku00fe00, 99) == 1); + static_assert(__builtin_memcmp(ks00fe00, ku00feff, 99) == -1); + static_assert(__builtin_memcmp(ks00fe00, ks00feff, 2) == 0); + static_assert(__builtin_memcmp(ks00feff, ks00fe00, 99) == 1); + static_assert(__builtin_memcmp(ks00fe00, ks00feff, 99) == -1); + + constexpr bool kb000100[] = {false, true, false}; + static_assert(sizeof(bool) != 1u || __builtin_memcmp(ks00fe00, kb000100, 1) == 0); + static_assert(sizeof(bool) != 1u || __builtin_memcmp(ks00fe00, kb000100, 2) == 1); + + constexpr long ksl[] = {0, -1}; + constexpr unsigned int kui[] = {0, 0u - 1}; + constexpr unsigned long long kull[] = {0, 0ull - 1}; + constexpr const auto *kuSizeofLong(void) { + if constexpr(sizeof(long) == sizeof(int)) { + return kui; + } else { + static_assert(sizeof(long) == sizeof(long long)); + return kull; + } + } + static_assert(__builtin_memcmp(ksl, kuSizeofLong(), sizeof(long) - 1) == 0); + static_assert(__builtin_memcmp(ksl, kuSizeofLong(), sizeof(long) + 0) == 0); + static_assert(__builtin_memcmp(ksl, kuSizeofLong(), sizeof(long) + 1) == 0); + static_assert(__builtin_memcmp(ksl, kuSizeofLong(), 2*sizeof(long) - 1) == 0); + static_assert(__builtin_memcmp(ksl, kuSizeofLong(), 2*sizeof(long) + 0) == 0); + static_assert(__builtin_memcmp(ksl, kuSizeofLong(), 2*sizeof(long) + 1) == 42); // expected-error {{not an integral constant}} expected-note {{dereferenced one-past-the-end}} + static_assert(__builtin_memcmp(ksl + 1, kuSizeofLong() + 1, sizeof(long) - 1) == 0); + static_assert(__builtin_memcmp(ksl + 1, kuSizeofLong() + 1, sizeof(long) + 0) == 0); + static_assert(__builtin_memcmp(ksl + 1, kuSizeofLong() + 1, sizeof(long) + 1) == 42); // expected-error {{not an integral constant}} expected-note {{dereferenced one-past-the-end}} + constexpr int a = strcmp("hello", "world"); // expected-error {{constant expression}} expected-note {{non-constexpr function 'strcmp' cannot be used in a constant expression}} constexpr int b = strncmp("hello", "world", 3); // expected-error {{constant expression}} expected-note {{non-constexpr function 'strncmp' cannot be used in a constant expression}} constexpr int c = memcmp("hello", "world", 3); // expected-error {{constant expression}} expected-note {{non-constexpr function 'memcmp' cannot be used in a constant expression}} @@ -187,6 +232,26 @@ static_assert(__builtin_memchr(nullptr, 'x', 3) == nullptr); // expected-error {{not an integral constant}} expected-note {{dereferenced null}} static_assert(__builtin_memchr(nullptr, 'x', 0) == nullptr); // FIXME: Should we reject this? + extern struct Incomplete incomplete; + static_assert(__builtin_memchr(&incomplete, 0, 0u) == nullptr); + static_assert(__builtin_memchr(&incomplete, 0, 1u) == nullptr); // expected-error {{not an integral constant}} expected-note {{read of incomplete type 'struct Incomplete'}} + + const unsigned char &u1 = 0xf0; + auto &&i1 = (const signed char []){-128}; // expected-warning {{compound literals are a C99-specific feature}} + static_assert(__builtin_memchr(&u1, -(0x0f + 1), 1) == &u1); + static_assert(__builtin_memchr(i1, 0x80, 1) == i1); + + enum class E : unsigned char {}; + constexpr E e{240}; + static_assert(__builtin_memchr(&e, 240, 1) == &e); + + constexpr bool kBool[] = {false, true, false}; + constexpr const bool *const kBoolPastTheEndPtr = kBool + 3; + static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, 1, 99) == kBool + 1); + static_assert(sizeof(bool) != 1u || __builtin_memchr(kBool + 1, 0, 99) == kBoolPastTheEndPtr - 1); + static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, -1, 3) == nullptr); + static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr, 0, 1) == nullptr); // expected-error {{not an integral constant}} expected-note {{dereferenced one-past-the-end}} + static_assert(__builtin_char_memchr(kStr, 'a', 0) == nullptr); static_assert(__builtin_char_memchr(kStr, 'a', 1) == kStr); static_assert(__builtin_char_memchr(kStr, '\0', 5) == nullptr); Index: test/CodeGenCXX/builtins.cpp =================================================================== --- test/CodeGenCXX/builtins.cpp +++ test/CodeGenCXX/builtins.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple=s390x-suse-linux -emit-llvm -o - %s | FileCheck %s // PR8839 extern "C" char memmove(); @@ -27,9 +28,89 @@ // CHECK: [[Y:%.+]] = call i64 @_Z13__builtin_absl(i64 -2) // CHECK: store i64 [[Y]], i64* @y, align 8 +namespace MemchrMultibyteElementTests { +#define STR2(X) #X +#define STR(X) STR2(X) +constexpr const char ByteOrderString[] = STR(__BYTE_ORDER__); +// CHECK: define void @_ZN27MemchrMultibyteElementTests12memchr_testsEv() +void memchr_tests(void) { + extern void matchedFirstByteIn04030201(); + constexpr unsigned int u04030201 = 0x04030201; + if (__builtin_memchr(&u04030201, *ByteOrderString - '0', 1u) == &u04030201) { + matchedFirstByteIn04030201(); + // CHECK: call void @_ZN27MemchrMultibyteElementTests26matchedFirstByteIn04030201Ev() + } + extern void checkedFirstByteIn000000ED(); + constexpr unsigned int uED = 0xEDU; + if (__builtin_memchr(&uED, 0xED, 1u) == + (*ByteOrderString == '1' ? &uED : nullptr)) { + checkedFirstByteIn000000ED(); + // CHECK: call void @_ZN27MemchrMultibyteElementTests26checkedFirstByteIn000000EDEv() + } +} +#undef STR +#undef STR2 +} + extern const char char_memchr_arg[32]; char *memchr_result = __builtin_char_memchr(char_memchr_arg, 123, 32); -// CHECK: call i8* @memchr(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @char_memchr_arg, i32 0, i32 0), i32 123, i64 32) +// CHECK: call i8* @memchr(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @char_memchr_arg, i32 0, i32 0), i32 {{(signext )?}}123, i64 32) + +namespace MemcmpMultibyteElementTests { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define CHK_BY_ENDIAN(X, OP_LE, OP_OTHER) ((X) OP_LE 0) +#else +#define CHK_BY_ENDIAN(X, OP_LE, OP_OTHER) ((X) OP_OTHER 0) +#endif +#define LT < +#define EQ == +#define GT > +using MemchrMultibyteElementTests::ByteOrderString; +// CHECK: define void @_ZN27MemcmpMultibyteElementTests12memcmp_testsEv() +void memcmp_tests(void) { +#ifdef __SIZEOF_INT128__ + extern void checkedFirstByteInAllZeroLowerHalfInt128Pair(); + constexpr __int128 i128_ff_8_00_8 = -(__int128)1 - -1ull; + constexpr __int128 i128_00_16 = 0; + if (CHK_BY_ENDIAN(__builtin_memcmp(&i128_ff_8_00_8, &i128_00_16, 1u), + EQ, GT)) { + checkedFirstByteInAllZeroLowerHalfInt128Pair(); + // CHECK: call void @_ZN27MemcmpMultibyteElementTests44checkedFirstByteInAllZeroLowerHalfInt128PairEv() + } +#endif + extern void mixedSizeComparisonSucceeded(); + constexpr const signed char ByteOrderStringReduced[] = { + ByteOrderString[0] - '0', ByteOrderString[1] - '0', + ByteOrderString[2] - '0', ByteOrderString[3] - '0', + }; + constexpr signed int i04030201 = 0x04030201; + constexpr unsigned int u04030201 = 0x04030201u; + if (__builtin_memcmp(ByteOrderStringReduced, &i04030201, sizeof(int)) == 0 && + __builtin_memcmp(&u04030201, ByteOrderStringReduced, sizeof(int)) == 0) { + mixedSizeComparisonSucceeded(); + // CHECK: call void @_ZN27MemcmpMultibyteElementTests28mixedSizeComparisonSucceededEv() + } + extern void checkedFirstByteInMixedSizeButEqualValueUIntUShortPair(); + constexpr unsigned int ui0000FEFF = 0x0000feffU; + constexpr unsigned short usFEFF = 0xfeffU; + if (CHK_BY_ENDIAN(__builtin_memcmp(&ui0000FEFF, &usFEFF, 1u), EQ, LT)) { + checkedFirstByteInMixedSizeButEqualValueUIntUShortPair(); + // CHECK: call void @_ZN27MemcmpMultibyteElementTests54checkedFirstByteInMixedSizeButEqualValueUIntUShortPairEv() + } + extern void checkedForFirstDifferentByteInUIntPair(); + constexpr unsigned int ui08038700 = 0x08038700u; + constexpr unsigned int ui08048600 = 0x08048600u; + if (CHK_BY_ENDIAN(__builtin_memcmp(&ui08038700, &ui08048600, sizeof(int)), + GT, LT)) { + checkedForFirstDifferentByteInUIntPair(); + // CHECK: call void @_ZN27MemcmpMultibyteElementTests38checkedForFirstDifferentByteInUIntPairEv() + } +} +#undef GT +#undef EQ +#undef LT +#undef CHK_BY_ENDIAN +} int constexpr_overflow_result() { constexpr int x = 1; Index: lib/AST/ExprConstant.cpp =================================================================== --- lib/AST/ExprConstant.cpp +++ lib/AST/ExprConstant.cpp @@ -1290,6 +1290,14 @@ } } +/// Kinds of access we can perform on an object, for diagnostics. +enum AccessKinds { + AK_Read, + AK_Assign, + AK_Increment, + AK_Decrement +}; + namespace { struct ComplexValue { private: @@ -1395,21 +1403,36 @@ set(B, true); } + private: // Check that this LValue is not based on a null pointer. If it is, produce // a diagnostic and mark the designator as invalid. - bool checkNullPointer(EvalInfo &Info, const Expr *E, - CheckSubobjectKind CSK) { + template <typename GenDiagType> + bool checkNullPointerDiagnosingWith(const GenDiagType &GenDiag) { if (Designator.Invalid) return false; if (IsNullPtr) { - Info.CCEDiag(E, diag::note_constexpr_null_subobject) - << CSK; + GenDiag(); Designator.setInvalid(); return false; } return true; } + public: + bool checkNullPointer(EvalInfo &Info, const Expr *E, + CheckSubobjectKind CSK) { + return checkNullPointerDiagnosingWith([&Info, E, CSK] { + Info.CCEDiag(E, diag::note_constexpr_null_subobject) << CSK; + }); + } + + bool checkNullPointerForFoldAccess(EvalInfo &Info, const Expr *E, + AccessKinds AK) { + return checkNullPointerDiagnosingWith([&Info, E, AK] { + Info.FFDiag(E, diag::note_constexpr_access_null) << AK; + }); + } + // Check this LValue refers to an object. If not, set the designator to be // invalid and emit a diagnostic. bool checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK) { @@ -2746,14 +2769,6 @@ return false; } -/// Kinds of access we can perform on an object, for diagnostics. -enum AccessKinds { - AK_Read, - AK_Assign, - AK_Increment, - AK_Decrement -}; - namespace { /// A handle to a complete object (an object that is not a subobject of /// another object). @@ -6126,9 +6141,23 @@ return false; MaxLength = N.getExtValue(); } - - QualType CharTy = E->getArg(0)->getType()->getPointeeType(); - + // We cannot find the value if there are no candidates to match against. + if (MaxLength == 0u) + return ZeroInitialization(E); + if (!Result.checkNullPointerForFoldAccess(Info, E, AK_Read)) + return false; + QualType CharTy = + Info.Ctx.getBaseElementType(getType(Result.getLValueBase())); + const bool IsRawByte = BuiltinOp == Builtin::BImemchr || + BuiltinOp == Builtin::BI__builtin_memchr; + // Pointers to const void may point to objects of incomplete type. + if (IsRawByte && CharTy->isIncompleteType()) { + Info.FFDiag(E, diag::note_constexpr_ltor_incomplete_type) << CharTy; + return false; + } + // Give up on byte-oriented matching against multibyte elements. + if (IsRawByte && Info.Ctx.getTypeSize(CharTy) > Info.Ctx.getCharWidth()) + return false; // Figure out what value we're actually looking for (after converting to // the corresponding unsigned type if necessary). uint64_t DesiredVal; @@ -8382,8 +8411,6 @@ !EvaluatePointer(E->getArg(1), String2, Info)) return false; - QualType CharTy = E->getArg(0)->getType()->getPointeeType(); - uint64_t MaxLength = uint64_t(-1); if (BuiltinOp != Builtin::BIstrcmp && BuiltinOp != Builtin::BIwcscmp && @@ -8394,6 +8421,78 @@ return false; MaxLength = N.getExtValue(); } + + // Empty substrings compare equal by definition. + if (MaxLength == 0u) + return Success(0, E); + + if (!String1.checkNullPointerForFoldAccess(Info, E, AK_Read) || + !String2.checkNullPointerForFoldAccess(Info, E, AK_Read)) + return false; + + QualType CharTy1 = + Info.Ctx.getBaseElementType(getType(String1.getLValueBase())); + QualType CharTy2 = + Info.Ctx.getBaseElementType(getType(String2.getLValueBase())); + + const auto &ReadCurElems = [&](APValue &Char1, APValue &Char2) { + return handleLValueToRValueConversion(Info, E, CharTy1, String1, Char1) && + handleLValueToRValueConversion(Info, E, CharTy2, String2, Char2) && + Char1.isInt() && Char2.isInt(); + }; + const auto &AdvanceElems = [&] { + return HandleLValueArrayAdjustment(Info, E, String1, CharTy1, 1) && + HandleLValueArrayAdjustment(Info, E, String2, CharTy2, 1); + }; + + const bool IsRawByte = BuiltinOp == Builtin::BImemcmp || + BuiltinOp == Builtin::BI__builtin_memcmp; + + if (IsRawByte) { + // Pointers to const void may point to objects of incomplete type. + if (CharTy1->isIncompleteType()) { + Info.FFDiag(E, diag::note_constexpr_ltor_incomplete_type) << CharTy1; + return false; + } + if (CharTy2->isIncompleteType()) { + Info.FFDiag(E, diag::note_constexpr_ltor_incomplete_type) << CharTy2; + return false; + } + const auto CharTy1Width = Info.Ctx.getTypeSize(CharTy1); + // Give up on comparing between elements with disparate widths. + if (CharTy1Width != Info.Ctx.getTypeSize(CharTy2)) + return false; + const auto LengthPerElement = CharTy1Width / Info.Ctx.getCharWidth(); + assert(MaxLength); + for (;;) { + APValue Char1, Char2; + if (!ReadCurElems(Char1, Char2)) + return false; + // We have compatible in-memory widths, but a possible type and internal + // representation mismatch. Assuming two's complement representation, + // including 0 for `false` and 1 for `true`, we can check an appropriate + // number of elements for equality even if they are not byte-sized. + const APSInt Char1InMem = Char1.getInt().extOrTrunc(CharTy1Width); + const APSInt Char2InMem = Char2.getInt().extOrTrunc(CharTy1Width); + if (Char1InMem.ne(Char2InMem)) { + // If the elements are byte-sized, then we can produce a three-way + // comparison result in a straightforward manner. + if (LengthPerElement == 1u) { + // memcmp always compares unsigned chars. + return Success(Char1InMem.ult(Char2InMem) ? -1 : 1, E); + } + return false; + } + if (!AdvanceElems()) + return false; + if (MaxLength <= LengthPerElement) + break; + MaxLength -= LengthPerElement; + } + // Enough elements are equal to account for the memcmp limit. + return Success(0, E); + } + bool StopAtNull = (BuiltinOp != Builtin::BImemcmp && BuiltinOp != Builtin::BIwmemcmp && BuiltinOp != Builtin::BI__builtin_memcmp && @@ -8404,11 +8503,10 @@ BuiltinOp == Builtin::BI__builtin_wcscmp || BuiltinOp == Builtin::BI__builtin_wcsncmp || BuiltinOp == Builtin::BI__builtin_wmemcmp; + for (; MaxLength; --MaxLength) { APValue Char1, Char2; - if (!handleLValueToRValueConversion(Info, E, CharTy, String1, Char1) || - !handleLValueToRValueConversion(Info, E, CharTy, String2, Char2) || - !Char1.isInt() || !Char2.isInt()) + if (!ReadCurElems(Char1, Char2)) return false; if (Char1.getInt() != Char2.getInt()) { if (IsWide) // wmemcmp compares with wchar_t signedness. @@ -8419,8 +8517,7 @@ if (StopAtNull && !Char1.getInt()) return Success(0, E); assert(!(StopAtNull && !Char2.getInt())); - if (!HandleLValueArrayAdjustment(Info, E, String1, CharTy, 1) || - !HandleLValueArrayAdjustment(Info, E, String2, CharTy, 1)) + if (!AdvanceElems()) return false; } // We hit the strncmp / memcmp limit. Index: include/clang/Basic/DiagnosticASTKinds.td =================================================================== --- include/clang/Basic/DiagnosticASTKinds.td +++ include/clang/Basic/DiagnosticASTKinds.td @@ -121,6 +121,8 @@ "read of non-const variable %0 is not allowed in a constant expression">; def note_constexpr_ltor_non_constexpr : Note< "read of non-constexpr variable %0 is not allowed in a constant expression">; +def note_constexpr_ltor_incomplete_type : Note< + "read of incomplete type %0 is not allowed in a constant expression">; def note_constexpr_access_null : Note< "%select{read of|assignment to|increment of|decrement of}0 " "dereferenced null pointer is not allowed in a constant expression">;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits