https://github.com/ojhunt created https://github.com/llvm/llvm-project/pull/173140
Adds support for null terminated strings produced by constexpr evaluation. This makes it possible to perform analysis of format strings that previously were not possible, and is needed in the future to support __ptrauth qualifier options. >From 0c70ec6ff2b92ba08a9a5a619b559cdc5fd6e7a1 Mon Sep 17 00:00:00 2001 From: Oliver Hunt <[email protected]> Date: Fri, 19 Dec 2025 20:33:17 -0800 Subject: [PATCH] [clang] Add support for consteval null terminated strings Adds support for null terminated strings produced by constexpr evaluation. This makes it possible to perform analysis of format strings that previously were not possible, and is needed in the future to support __ptrauth qualifier options. --- .../clang/Basic/DiagnosticSemaKinds.td | 17 ++++--- clang/include/clang/Sema/Sema.h | 9 ++-- clang/lib/AST/ByteCode/Context.cpp | 8 +-- clang/lib/Sema/SemaDeclCXX.cpp | 42 +++++++++++++++ clang/test/Parser/asm.cpp | 29 ++++++++++- clang/test/SemaCXX/gnu-asm-constexpr.cpp | 4 +- clang/test/SemaCXX/static-assert-cxx26.cpp | 51 ++++++++++++++++++- 7 files changed, 142 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 51b6eba965103..9388ada755606 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -137,14 +137,14 @@ def err_typecheck_converted_constant_expression_disallowed : Error< def err_typecheck_converted_constant_expression_indirect : Error< "conversion from %0 to %1 in converted constant expression would " "bind reference to a temporary">; -def err_expr_not_cce : Error< + def subst_cce_desc : TextSubstitution< "%select{case value|enumerator value|non-type template argument|non-type parameter of template template parameter|" "array size|explicit specifier argument|noexcept specifier argument|" - "call to 'size()'|call to 'data()'}0 is not a constant expression">; + "call to 'size()'|call to 'data()'|null terminated consteval string}0">; +def err_expr_not_cce : Error< + "%sub{subst_cce_desc}0 is not a constant expression">; def ext_cce_narrowing : ExtWarn< - "%select{case value|enumerator value|non-type template argument|non-type parameter of template template parameter|" - "array size|explicit specifier argument|noexcept specifier argument|" - "call to 'size()'|call to 'data()'}0 %select{cannot be narrowed from " + "%sub{subst_cce_desc}0 %select{cannot be narrowed from " "type %2 to %3|evaluates to %2, which cannot be narrowed to type %3}1">, InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure; def err_ice_not_integral : Error< @@ -1747,7 +1747,7 @@ def subst_user_defined_msg : TextSubstitution< "%select{a static assertion|this asm operand}0">; def err_user_defined_msg_invalid : Error< - "%sub{subst_user_defined_msg}0 must be a string literal or an " + "%sub{subst_user_defined_msg}0 must be a null terminated constant string or an " "object with 'data()' and 'size()' member functions">; def err_user_defined_msg_missing_member_function : Error< "the %select{message|string}0 object in " @@ -1755,6 +1755,11 @@ def err_user_defined_msg_missing_member_function : Error< "a 'size()' member function|" "a 'data()' member function|" "'data()' and 'size()' member functions}1">; +def err_user_defined_msg_not_null_terminated_string : Error< + "%sub{subst_user_defined_msg}0 is not null terminated">; +def ext_consteval_string_constants : Extension< + "consteval string constants are an extension">, DefaultWarn, + InGroup<DiagGroup<"consteval-string-constants-extension">>; def err_user_defined_msg_invalid_mem_fn_ret_ty : Error< "%sub{subst_user_defined_msg}0 must have a '%select{size|data}1()' member " "function returning an object convertible to '%select{std::size_t|const char *}1'">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 91b07aa500b86..9d00aa5edebc9 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -831,10 +831,11 @@ enum class CCEKind { ArrayBound, ///< Array bound in array declarator or new-expression. ExplicitBool, ///< Condition in an explicit(bool) specifier. Noexcept, ///< Condition in a noexcept(bool) specifier. - StaticAssertMessageSize, ///< Call to size() in a static assert - ///< message. - StaticAssertMessageData, ///< Call to data() in a static assert - ///< message. + StaticAssertMessageSize, ///< Call to size() in a static assert + ///< message. + StaticAssertMessageData, ///< Call to data() in a static assert + ///< message. + StaticAssertNullTerminatedString, ///< tryEvaluateStrLen }; /// Enums for the diagnostics of target, target_version and target_clones. diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index 74ec986e49ca7..208fcb2a2732e 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -294,13 +294,15 @@ bool Context::evaluateStrlen(State &Parent, const Expr *E, uint64_t &Result) { if (!FieldDesc->isPrimitiveArray()) return false; - if (Ptr.isDummy() || Ptr.isUnknownSizeArray()) + if (Ptr.isDummy() || Ptr.isUnknownSizeArray() || Ptr.isPastEnd()) return false; unsigned N = Ptr.getNumElems(); if (Ptr.elemSize() == 1) { - Result = strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), N); - return Result != N; + unsigned Size = N - Ptr.getIndex(); + Result = + strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), Size); + return Result != Size; } PrimType ElemT = FieldDesc->getPrimType(); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 4da431f19acec..45cdf9563235c 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -17693,6 +17693,44 @@ void Sema::DiagnoseStaticAssertDetails(const Expr *E) { } } +template <typename ResultType> +static bool EvaluateAsNullTerminatedCharBuffer( + Sema &SemaRef, Expr *Message, ResultType &Result, ASTContext &Ctx, + Sema::StringEvaluationContext EvalContext, bool ErrorOnInvalidMessage) { + SourceLocation Loc = Message->getBeginLoc(); + QualType SizeT = Ctx.getSizeType(); + QualType ConstCharPtr = Ctx.getPointerType(Ctx.getConstType(Ctx.CharTy)); + Expr::EvalResult Status; + SmallVector<PartialDiagnosticAt, 8> Notes; + Status.Diag = &Notes; + + auto DiagnoseInvalidConstantString = [&]() { + SemaRef.Diag(Loc, diag::err_user_defined_msg_not_null_terminated_string) + << EvalContext; + for (const auto &Note : Notes) + SemaRef.Diag(Note.first, Note.second); + return !ErrorOnInvalidMessage; + }; + ExprResult EvaluatedData = SemaRef.BuildConvertedConstantExpression( + Message, ConstCharPtr, CCEKind::StaticAssertNullTerminatedString); + if (EvaluatedData.isInvalid()) + return DiagnoseInvalidConstantString(); + + uint64_t Length = 0; + if (!EvaluatedData.get()->tryEvaluateStrLen(Length, Ctx)) + return DiagnoseInvalidConstantString(); + + llvm::APInt SizeVal(Ctx.getIntWidth(SizeT), Length); + Expr *SizeExpr = IntegerLiteral::Create(Ctx, SizeVal, SizeT, Loc); + + bool EvalResult = Message->EvaluateCharRangeAsString( + Result, SizeExpr, EvaluatedData.get(), Ctx, Status); + if (!EvalResult || !Notes.empty()) + return DiagnoseInvalidConstantString(); + SemaRef.Diag(Loc, diag::ext_consteval_string_constants); + return true; +} + template <typename ResultType> static bool EvaluateAsStringImpl(Sema &SemaRef, Expr *Message, ResultType &Result, ASTContext &Ctx, @@ -17726,6 +17764,10 @@ static bool EvaluateAsStringImpl(Sema &SemaRef, Expr *Message, SourceLocation Loc = Message->getBeginLoc(); QualType T = Message->getType().getNonReferenceType(); + if (T->isPointerType() && T->getPointeeType()->isCharType()) + return EvaluateAsNullTerminatedCharBuffer( + SemaRef, Message, Result, Ctx, EvalContext, ErrorOnInvalidMessage); + auto *RD = T->getAsCXXRecordDecl(); if (!RD) { SemaRef.Diag(Loc, diag::err_user_defined_msg_invalid) << EvalContext; diff --git a/clang/test/Parser/asm.cpp b/clang/test/Parser/asm.cpp index cf23b6f42a077..517af5e777ce6 100644 --- a/clang/test/Parser/asm.cpp +++ b/clang/test/Parser/asm.cpp @@ -35,6 +35,9 @@ struct string_view { int foo1 asm ((string_view("test"))); // expected-error {{expected string literal in 'asm'}} int func() asm ((string_view("test"))); // expected-error {{expected string literal in 'asm'}} +constexpr const char* getConstantString(const char* s) { + return s; +} void f2() { asm(string_view("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} @@ -44,6 +47,13 @@ void f2() { asm("" :: string_view("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} asm(::string_view("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm(getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm("" : getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm("" : : getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm("" : : : getConstantString("")); // expected-error {{expected ')'}} + asm("" :: getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + asm(::getConstantString("")); // expected-error {{expected string literal or parenthesized constant expression in 'asm'}} + int i; asm((string_view(""))); @@ -55,5 +65,22 @@ void f2() { asm("" : (::string_view("+g")) (i) : (::string_view("g")) (0) : (string_view("memory"))); - asm((0)); // expected-error {{the expression in this asm operand must be a string literal or an object with 'data()' and 'size()' member functions}} + asm((getConstantString(""))); + // expected-warning@-1 {{consteval string constants are an extension}} + asm((::getConstantString(""))); + // expected-warning@-1 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g")) (i)); + // expected-warning@-1 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g"))); // expected-error {{expected '(' after 'asm operand'}} + // expected-warning@-1 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g")) (0)); + // expected-warning@-1 2 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g"))); // expected-error {{expected '(' after 'asm operand'}} + // expected-warning@-1 2 {{consteval string constants are an extension}} + asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g")) (0) : (getConstantString("memory"))); + // expected-warning@-1 3 {{consteval string constants are an extension}} + + + + asm((0)); // expected-error {{the expression in this asm operand must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} } diff --git a/clang/test/SemaCXX/gnu-asm-constexpr.cpp b/clang/test/SemaCXX/gnu-asm-constexpr.cpp index 77466df12bdc1..f1d2862a62918 100644 --- a/clang/test/SemaCXX/gnu-asm-constexpr.cpp +++ b/clang/test/SemaCXX/gnu-asm-constexpr.cpp @@ -77,7 +77,7 @@ struct string_view { void f() { - asm(("")); // expected-error {{the expression in this asm operand must be a string literal or an object with 'data()' and 'size()' member functions}} + asm(("")); // expected-error {{the expression in this asm operand must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} asm((NotAString{})); // expected-error {{the string object in this asm operand is missing 'data()' and 'size()' member functions}} asm((MessageInvalidData{})); // expected-error {{the expression in this asm operand must have a 'data()' member function returning an object convertible to 'const char *'}} \ // expected-error {{too few arguments to function call, expected 1, have 0}} @@ -106,7 +106,7 @@ void test_dependent1(int i) { template void test_dependent1<int>(int); // expected-note@-1 {{in instantiation of function template specialization}} -// expected-error@#err-int {{the expression in this asm operand must be a string literal or an object with 'data()' and 'size()' member functions}} +// expected-error@#err-int {{the expression in this asm operand must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} // expected-error@#err-int2 {{cannot initialize a value of type 'int' with an lvalue of type 'const char[3]'}} // expected-error@#err-int3 {{cannot initialize a value of type 'int' with an lvalue of type 'const char[2]'}} // expected-error@#err-int4 {{cannot initialize a value of type 'int' with an lvalue of type 'const char[7]'}} diff --git a/clang/test/SemaCXX/static-assert-cxx26.cpp b/clang/test/SemaCXX/static-assert-cxx26.cpp index b2ebd2abb785e..79be55610b27c 100644 --- a/clang/test/SemaCXX/static-assert-cxx26.cpp +++ b/clang/test/SemaCXX/static-assert-cxx26.cpp @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -std=c++2c -triple=x86_64-linux -fsyntax-only %s -verify -fexperimental-new-constant-interpreter static_assert(true, ""); -static_assert(true, 0); // expected-error {{the message in a static assertion must be a string literal or an object with 'data()' and 'size()' member functions}} +static_assert(true, 0); // expected-error {{the message in a static assertion must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} struct Empty{}; static_assert(true, Empty{}); // expected-error {{the message object in this static assertion is missing 'data()' and 'size()' member functions}} struct NoData { @@ -288,7 +288,7 @@ struct Good { template <typename Ty> struct Bad { - static_assert(false, Ty{}); // expected-error {{the message in a static assertion must be a string literal or an object with 'data()' and 'size()' member functions}} \ + static_assert(false, Ty{}); // expected-error {{the message in a static assertion must be a null terminated constant string or an object with 'data()' and 'size()' member functions}} \ // expected-error {{static assertion failed}} }; @@ -416,3 +416,50 @@ static_assert( // expected-note@-1 {{read of dereferenced one-past-the-end pointer is not allowed in a constant expression}} ); } + +static_assert(false, &(" basic test"[1])); +// expected-error@-1 {{static assertion failed: basic test}} +// expected-warning@-2 {{consteval string constants are an extension}} + +constexpr const char *constexpr_global = "global_constexpr"; +constexpr const char null_terminated_buffer[] = { 'n', 'u', 'l', 'l', 't', 'e', 'r', 'm', 0 }; +constexpr const char no_null_buffer[] = { 'n', 'o', 'n', 'u', 'l', 'l', 't', 'e', 'r', 'm' }; + +constexpr const char *selector(int i) { + constexpr const char * a_constant = "a_constant"; + const char *non_constexpr = "non-constexpr string"; + switch (i) { + case 0: return "case 0"; + case 1: return a_constant; + case 2: return constexpr_global; + case 3: return null_terminated_buffer; + case 4: return &(""[1]); // point to after the null terminator + case 5: return nullptr; + case 6: return no_null_buffer; + } +}; + +static_assert(false, selector(0)); +// expected-error@-1 {{static assertion failed: case 0}} +// expected-warning@-2 {{consteval string constants are an extension}} +static_assert(false, selector(1)); +// expected-error@-1 {{static assertion failed: a_constant}} +// expected-warning@-2 {{consteval string constants are an extension}} +static_assert(false, selector(2)); +// expected-error@-1 {{static assertion failed: global_constexpr}} +// expected-warning@-2 {{consteval string constants are an extension}} +static_assert(false, selector(3)); +// expected-error@-1 {{static assertion failed: nullterm}} +// expected-warning@-2 {{consteval string constants are an extension}} +static_assert(false, selector(4)); +// expected-error@-1 {{the message in a static assertion is not null terminated}} +// expected-error@-2 {{static assertion failed}} +static_assert(false, selector(5)); +// expected-error@-1 {{the message in a static assertion is not null terminated}} +// expected-error@-2 {{static assertion failed}} +static_assert(false, selector(6)); +// expected-error@-1 {{the message in a static assertion is not null terminated}} +// expected-error@-2 {{static assertion failed}} +static_assert(false, selector(7)); +// expected-error@-1 {{the message in a static assertion is not null terminated}} +// expected-error@-2 {{static assertion failed}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
