https://github.com/ojhunt updated https://github.com/llvm/llvm-project/pull/173140
>From 2a7c39edfda0e7ccaf5aa2b9fa9c07aebb6b8f62 Mon Sep 17 00:00:00 2001 From: Oliver Hunt <[email protected]> Date: Fri, 19 Dec 2025 20:33:17 -0800 Subject: [PATCH] [clang] Add support for consteval null terminated strings Adds support for null terminated strings produced by constexpr evaluation. This makes it possible to perform analysis of format strings that previously were not possible, and is needed in the future to support __ptrauth qualifier options. --- .../clang/Basic/DiagnosticSemaKinds.td | 13 +++-- clang/include/clang/Sema/Sema.h | 9 ++-- clang/lib/AST/ByteCode/Context.cpp | 8 +-- clang/lib/Sema/SemaDeclCXX.cpp | 42 +++++++++++++++ clang/test/Parser/asm.cpp | 29 ++++++++++- clang/test/SemaCXX/gnu-asm-constexpr.cpp | 4 +- clang/test/SemaCXX/static-assert-cxx26.cpp | 51 ++++++++++++++++++- 7 files changed, 141 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 51b6eba965103..32908ece9cc21 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1746,15 +1746,22 @@ def subst_user_defined_msg : TextSubstitution< "%select{the message|the expression}0 in " "%select{a static assertion|this asm operand}0">; -def err_user_defined_msg_invalid : Error< - "%sub{subst_user_defined_msg}0 must be a string literal or an " - "object with 'data()' and 'size()' member functions">; +def err_user_defined_msg_invalid + : Error<"%sub{subst_user_defined_msg}0 must be a null terminated constant " + "string or an " + "object with 'data()' and 'size()' member functions">; def err_user_defined_msg_missing_member_function : Error< "the %select{message|string}0 object in " "%select{this static assertion|this asm operand}0 is missing %select{" "a 'size()' member function|" "a 'data()' member function|" "'data()' and 'size()' member functions}1">; +def err_user_defined_msg_not_null_terminated_string + : Error<"%sub{subst_user_defined_msg}0 is not null terminated">; +def ext_consteval_string_constants + : Extension<"consteval string constants are an extension">, + DefaultWarn, + InGroup<DiagGroup<"consteval-string-constants-extension">>; def err_user_defined_msg_invalid_mem_fn_ret_ty : Error< "%sub{subst_user_defined_msg}0 must have a '%select{size|data}1()' member " "function returning an object convertible to '%select{std::size_t|const char *}1'">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 91b07aa500b86..9d00aa5edebc9 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -831,10 +831,11 @@ enum class CCEKind { ArrayBound, ///< Array bound in array declarator or new-expression. ExplicitBool, ///< Condition in an explicit(bool) specifier. Noexcept, ///< Condition in a noexcept(bool) specifier. - StaticAssertMessageSize, ///< Call to size() in a static assert - ///< message. - StaticAssertMessageData, ///< Call to data() in a static assert - ///< message. + StaticAssertMessageSize, ///< Call to size() in a static assert + ///< message. + StaticAssertMessageData, ///< Call to data() in a static assert + ///< message. + StaticAssertNullTerminatedString, ///< tryEvaluateStrLen }; /// Enums for the diagnostics of target, target_version and target_clones. diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index 74ec986e49ca7..208fcb2a2732e 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -294,13 +294,15 @@ bool Context::evaluateStrlen(State &Parent, const Expr *E, uint64_t &Result) { if (!FieldDesc->isPrimitiveArray()) return false; - if (Ptr.isDummy() || Ptr.isUnknownSizeArray()) + if (Ptr.isDummy() || Ptr.isUnknownSizeArray() || Ptr.isPastEnd()) return false; unsigned N = Ptr.getNumElems(); if (Ptr.elemSize() == 1) { - Result = strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), N); - return Result != N; + unsigned Size = N - Ptr.getIndex(); + Result = + strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), Size); + return Result != Size; } PrimType ElemT = FieldDesc->getPrimType(); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 4da431f19acec..45cdf9563235c 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -17693,6 +17693,44 @@ void Sema::DiagnoseStaticAssertDetails(const Expr *E) { } } +template <typename ResultType> +static bool EvaluateAsNullTerminatedCharBuffer( + Sema &SemaRef, Expr *Message, ResultType &Result, ASTContext &Ctx, + Sema::StringEvaluationContext EvalContext, bool ErrorOnInvalidMessage) { + SourceLocation Loc = Message->getBeginLoc(); + QualType SizeT = Ctx.getSizeType(); + QualType ConstCharPtr = Ctx.getPointerType(Ctx.getConstType(Ctx.CharTy)); + Expr::EvalResult Status; + SmallVector<PartialDiagnosticAt, 8> Notes; + Status.Diag = &Notes; + + auto DiagnoseInvalidConstantString = [&]() { + SemaRef.Diag(Loc, diag::err_user_defined_msg_not_null_terminated_string) + << EvalContext; + for (const auto &Note : Notes) + SemaRef.Diag(Note.first, Note.second); + return !ErrorOnInvalidMessage; + }; + ExprResult EvaluatedData = SemaRef.BuildConvertedConstantExpression( + Message, ConstCharPtr, CCEKind::StaticAssertNullTerminatedString); + if (EvaluatedData.isInvalid()) + return DiagnoseInvalidConstantString(); + + uint64_t Length = 0; + if (!EvaluatedData.get()->tryEvaluateStrLen(Length, Ctx)) + return DiagnoseInvalidConstantString(); + + llvm::APInt SizeVal(Ctx.getIntWidth(SizeT), Length); + Expr *SizeExpr = IntegerLiteral::Create(Ctx, SizeVal, SizeT, Loc); + + bool EvalResult = Message->EvaluateCharRangeAsString( + Result, SizeExpr, EvaluatedData.get(), Ctx, Status); + if (!EvalResult || !Notes.empty()) + return DiagnoseInvalidConstantString(); + SemaRef.Diag(Loc, diag::ext_consteval_string_constants); + return true; +} + template <typename ResultType> static bool EvaluateAsStringImpl(Sema &SemaRef, Expr *Message, ResultType &Result, ASTContext &Ctx, @@ -17726,6 +17764,10 @@ static bool EvaluateAsStringImpl(Sema &SemaRef, Expr *Message, SourceLocation Loc = Message->getBeginLoc(); QualType T = Message->getType().getNonReferenceType(); + if (T->isPointerType() && T->getPointeeType()->isCharType()) + return EvaluateAsNullTerminatedCharBuffer( + SemaRef, Message, Result, Ctx, EvalContext, ErrorOnInvalidMessage); + auto *RD = T->getAsCXXRecordDecl(); if (!RD) { SemaRef.Diag(Loc, diag::err_user_defined_msg_invalid) << EvalContext; diff --git a/clang/test/Parser/asm.cpp b/clang/test/Parser/asm.cpp index cf23b6f42a077..517af5e777ce6 100644 --- a/clang/test/Parser/asm.cpp +++ b/clang/test/Parser/asm.cpp @@ -35,6 +35,9 @@ struct string_view { int foo1 asm ((string_view("test"))); // expected-error {{expected string literal in 'asm'}} int func() asm ((string_view("test"))); // expected-error {{expected string literal in 'asm'}} +constexpr const char* getConstantString(const char* s) { + return s; +} void f2() { asm(string_view("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} @@ -44,6 +47,13 @@ void f2() { asm("" :: string_view("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} asm(::string_view("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm(getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm("" : getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm("" : : getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm("" : : : getConstantString("")); // expected-error {{expected ')'}} + asm("" :: getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm(::getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + int i; asm((string_view(""))); @@ -55,5 +65,22 @@ void f2() { asm("" : (::string_view("+g")) (i) : (::string_view("g")) (0) : (string_view("memory"))); - asm((0)); // expected-error {{the expression in this asm operand must be a string literal or an object with 'data()' and 'size()' member functions}} + asm((getConstantString(""))); + // expected-warning@-1 {{consteval string constants are an extension}} + asm((::getConstantString(""))); + // expected-warning@-1 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g")) (i)); + // expected-warning@-1 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g"))); // expected-error {{expected '(' after 'asm operand'}} + // expected-warning@-1 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g")) (0)); + // expected-warning@-1 2 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g"))); // expected-error {{expected '(' after 'asm operand'}} + // expected-warning@-1 2 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g")) (0) : (getConstantString("memory"))); + // expected-warning@-1 3 {{consteval string constants are an extension}} + + + + asm((0)); // expected-error {{the expression in this asm operand must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} } diff --git a/clang/test/SemaCXX/gnu-asm-constexpr.cpp b/clang/test/SemaCXX/gnu-asm-constexpr.cpp index 77466df12bdc1..f1d2862a62918 100644 --- a/clang/test/SemaCXX/gnu-asm-constexpr.cpp +++ b/clang/test/SemaCXX/gnu-asm-constexpr.cpp @@ -77,7 +77,7 @@ struct string_view { void f() { - asm(("")); // expected-error {{the expression in this asm operand must be a string literal or an object with 'data()' and 'size()' member functions}} + asm(("")); // expected-error {{the expression in this asm operand must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} asm((NotAString{})); // expected-error {{the string object in this asm operand is missing 'data()' and 'size()' member functions}} asm((MessageInvalidData{})); // expected-error {{the expression in this asm operand must have a 'data()' member function returning an object convertible to 'const char *'}} \ // expected-error {{too few arguments to function call, expected 1, have 0}} @@ -106,7 +106,7 @@ void test_dependent1(int i) { template void test_dependent1<int>(int); // expected-note@-1 {{in instantiation of function template specialization}} -// expected-error@#err-int {{the expression in this asm operand must be a string literal or an object with 'data()' and 'size()' member functions}} +// expected-error@#err-int {{the expression in this asm operand must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} // expected-error@#err-int2 {{cannot initialize a value of type 'int' with an lvalue of type 'const char[3]'}} // expected-error@#err-int3 {{cannot initialize a value of type 'int' with an lvalue of type 'const char[2]'}} // expected-error@#err-int4 {{cannot initialize a value of type 'int' with an lvalue of type 'const char[7]'}} diff --git a/clang/test/SemaCXX/static-assert-cxx26.cpp b/clang/test/SemaCXX/static-assert-cxx26.cpp index b2ebd2abb785e..79be55610b27c 100644 --- a/clang/test/SemaCXX/static-assert-cxx26.cpp +++ b/clang/test/SemaCXX/static-assert-cxx26.cpp @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -std=c++2c -triple=x86_64-linux -fsyntax-only %s -verify -fexperimental-new-constant-interpreter static_assert(true, ""); -static_assert(true, 0); // expected-error {{the message in a static assertion must be a string literal or an object with 'data()' and 'size()' member functions}} +static_assert(true, 0); // expected-error {{the message in a static assertion must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} struct Empty{}; static_assert(true, Empty{}); // expected-error {{the message object in this static assertion is missing 'data()' and 'size()' member functions}} struct NoData { @@ -288,7 +288,7 @@ struct Good { template <typename Ty> struct Bad { - static_assert(false, Ty{}); // expected-error {{the message in a static assertion must be a string literal or an object with 'data()' and 'size()' member functions}} \ + static_assert(false, Ty{}); // expected-error {{the message in a static assertion must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} \ // expected-error {{static assertion failed}} }; @@ -416,3 +416,50 @@ static_assert( // expected-note@-1 {{read of dereferenced one-past-the-end pointer is not allowed in a constant expression}} ); } + +static_assert(false, &(" basic test"[1])); +// expected-error@-1 {{static assertion failed: basic test}} +// expected-warning@-2 {{consteval string constants are an extension}} + +constexpr const char *constexpr_global = "global_constexpr"; +constexpr const char null_terminated_buffer[] = { 'n', 'u', 'l', 'l', 't', 'e', 'r', 'm', 0 }; +constexpr const char no_null_buffer[] = { 'n', 'o', 'n', 'u', 'l', 'l', 't', 'e', 'r', 'm' }; + +constexpr const char *selector(int i) { + constexpr const char * a_constant = "a_constant"; + const char *non_constexpr = "non-constexpr string"; + switch (i) { + case 0: return "case 0"; + case 1: return a_constant; + case 2: return constexpr_global; + case 3: return null_terminated_buffer; + case 4: return &(""[1]); // point to after the null terminator + case 5: return nullptr; + case 6: return no_null_buffer; + } +}; + +static_assert(false, selector(0)); +// expected-error@-1 {{static assertion failed: case 0}} +// expected-warning@-2 {{consteval string constants are an extension}} +static_assert(false, selector(1)); +// expected-error@-1 {{static assertion failed: a_constant}} +// expected-warning@-2 {{consteval string constants are an extension}} +static_assert(false, selector(2)); +// expected-error@-1 {{static assertion failed: global_constexpr}} +// expected-warning@-2 {{consteval string constants are an extension}} +static_assert(false, selector(3)); +// expected-error@-1 {{static assertion failed: nullterm}} +// expected-warning@-2 {{consteval string constants are an extension}} +static_assert(false, selector(4)); +// expected-error@-1 {{the message in a static assertion is not null terminated}} +// expected-error@-2 {{static assertion failed}} +static_assert(false, selector(5)); +// expected-error@-1 {{the message in a static assertion is not null terminated}} +// expected-error@-2 {{static assertion failed}} +static_assert(false, selector(6)); +// expected-error@-1 {{the message in a static assertion is not null terminated}} +// expected-error@-2 {{static assertion failed}} +static_assert(false, selector(7)); +// expected-error@-1 {{the message in a static assertion is not null terminated}} +// expected-error@-2 {{static assertion failed}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
