https://github.com/ojhunt created 
https://github.com/llvm/llvm-project/pull/173140

Adds support for null terminated strings produced by constexpr evaluation. This 
makes it possible to perform analysis of format strings that previously were 
not possible, and is needed in the future to support __ptrauth qualifier 
options.

>From 0c70ec6ff2b92ba08a9a5a619b559cdc5fd6e7a1 Mon Sep 17 00:00:00 2001
From: Oliver Hunt <[email protected]>
Date: Fri, 19 Dec 2025 20:33:17 -0800
Subject: [PATCH] [clang] Add support for consteval null terminated strings

Adds support for null terminated strings produced by constexpr
evaluation. This makes it possible to perform analysis of format
strings that previously were not possible, and is needed in the
future to support __ptrauth qualifier options.
---
 .../clang/Basic/DiagnosticSemaKinds.td        | 17 ++++---
 clang/include/clang/Sema/Sema.h               |  9 ++--
 clang/lib/AST/ByteCode/Context.cpp            |  8 +--
 clang/lib/Sema/SemaDeclCXX.cpp                | 42 +++++++++++++++
 clang/test/Parser/asm.cpp                     | 29 ++++++++++-
 clang/test/SemaCXX/gnu-asm-constexpr.cpp      |  4 +-
 clang/test/SemaCXX/static-assert-cxx26.cpp    | 51 ++++++++++++++++++-
 7 files changed, 142 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 51b6eba965103..9388ada755606 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -137,14 +137,14 @@ def 
err_typecheck_converted_constant_expression_disallowed : Error<
 def err_typecheck_converted_constant_expression_indirect : Error<
   "conversion from %0 to %1 in converted constant expression would "
   "bind reference to a temporary">;
-def err_expr_not_cce : Error<
+  def subst_cce_desc : TextSubstitution<
   "%select{case value|enumerator value|non-type template argument|non-type 
parameter of template template parameter|"
   "array size|explicit specifier argument|noexcept specifier argument|"
-  "call to 'size()'|call to 'data()'}0 is not a constant expression">;
+  "call to 'size()'|call to 'data()'|null terminated consteval string}0">;
+def err_expr_not_cce : Error<
+  "%sub{subst_cce_desc}0 is not a constant expression">;
 def ext_cce_narrowing : ExtWarn<
-  "%select{case value|enumerator value|non-type template argument|non-type 
parameter of template template parameter|"
-  "array size|explicit specifier argument|noexcept specifier argument|"
-  "call to 'size()'|call to 'data()'}0 %select{cannot be narrowed from "
+  "%sub{subst_cce_desc}0 %select{cannot be narrowed from "
   "type %2 to %3|evaluates to %2, which cannot be narrowed to type %3}1">,
   InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure;
 def err_ice_not_integral : Error<
@@ -1747,7 +1747,7 @@ def subst_user_defined_msg : TextSubstitution<
   "%select{a static assertion|this asm operand}0">;
 
 def err_user_defined_msg_invalid : Error<
-  "%sub{subst_user_defined_msg}0 must be a string literal or an "
+  "%sub{subst_user_defined_msg}0 must be a null terminated constant string or 
an "
   "object with 'data()' and 'size()' member functions">;
 def err_user_defined_msg_missing_member_function : Error<
   "the %select{message|string}0 object in "
@@ -1755,6 +1755,11 @@ def err_user_defined_msg_missing_member_function : Error<
   "a 'size()' member function|"
   "a 'data()' member function|"
   "'data()' and 'size()' member functions}1">;
+def err_user_defined_msg_not_null_terminated_string : Error<
+  "%sub{subst_user_defined_msg}0 is not null terminated">;
+def ext_consteval_string_constants : Extension<
+  "consteval string constants are an extension">, DefaultWarn,
+  InGroup<DiagGroup<"consteval-string-constants-extension">>;
 def err_user_defined_msg_invalid_mem_fn_ret_ty : Error<
   "%sub{subst_user_defined_msg}0 must have a '%select{size|data}1()' member "
   "function returning an object convertible to '%select{std::size_t|const char 
*}1'">;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 91b07aa500b86..9d00aa5edebc9 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -831,10 +831,11 @@ enum class CCEKind {
   ArrayBound,    ///< Array bound in array declarator or new-expression.
   ExplicitBool,  ///< Condition in an explicit(bool) specifier.
   Noexcept,      ///< Condition in a noexcept(bool) specifier.
-  StaticAssertMessageSize, ///< Call to size() in a static assert
-                           ///< message.
-  StaticAssertMessageData, ///< Call to data() in a static assert
-                           ///< message.
+  StaticAssertMessageSize,          ///< Call to size() in a static assert
+                                    ///< message.
+  StaticAssertMessageData,          ///< Call to data() in a static assert
+                                    ///< message.
+  StaticAssertNullTerminatedString, ///< tryEvaluateStrLen
 };
 
 /// Enums for the diagnostics of target, target_version and target_clones.
diff --git a/clang/lib/AST/ByteCode/Context.cpp 
b/clang/lib/AST/ByteCode/Context.cpp
index 74ec986e49ca7..208fcb2a2732e 100644
--- a/clang/lib/AST/ByteCode/Context.cpp
+++ b/clang/lib/AST/ByteCode/Context.cpp
@@ -294,13 +294,15 @@ bool Context::evaluateStrlen(State &Parent, const Expr 
*E, uint64_t &Result) {
     if (!FieldDesc->isPrimitiveArray())
       return false;
 
-    if (Ptr.isDummy() || Ptr.isUnknownSizeArray())
+    if (Ptr.isDummy() || Ptr.isUnknownSizeArray() || Ptr.isPastEnd())
       return false;
 
     unsigned N = Ptr.getNumElems();
     if (Ptr.elemSize() == 1) {
-      Result = strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), N);
-      return Result != N;
+      unsigned Size = N - Ptr.getIndex();
+      Result =
+          strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), Size);
+      return Result != Size;
     }
 
     PrimType ElemT = FieldDesc->getPrimType();
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 4da431f19acec..45cdf9563235c 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -17693,6 +17693,44 @@ void Sema::DiagnoseStaticAssertDetails(const Expr *E) {
   }
 }
 
+template <typename ResultType>
+static bool EvaluateAsNullTerminatedCharBuffer(
+    Sema &SemaRef, Expr *Message, ResultType &Result, ASTContext &Ctx,
+    Sema::StringEvaluationContext EvalContext, bool ErrorOnInvalidMessage) {
+  SourceLocation Loc = Message->getBeginLoc();
+  QualType SizeT = Ctx.getSizeType();
+  QualType ConstCharPtr = Ctx.getPointerType(Ctx.getConstType(Ctx.CharTy));
+  Expr::EvalResult Status;
+  SmallVector<PartialDiagnosticAt, 8> Notes;
+  Status.Diag = &Notes;
+
+  auto DiagnoseInvalidConstantString = [&]() {
+    SemaRef.Diag(Loc, diag::err_user_defined_msg_not_null_terminated_string)
+        << EvalContext;
+    for (const auto &Note : Notes)
+      SemaRef.Diag(Note.first, Note.second);
+    return !ErrorOnInvalidMessage;
+  };
+  ExprResult EvaluatedData = SemaRef.BuildConvertedConstantExpression(
+      Message, ConstCharPtr, CCEKind::StaticAssertNullTerminatedString);
+  if (EvaluatedData.isInvalid())
+    return DiagnoseInvalidConstantString();
+
+  uint64_t Length = 0;
+  if (!EvaluatedData.get()->tryEvaluateStrLen(Length, Ctx))
+    return DiagnoseInvalidConstantString();
+
+  llvm::APInt SizeVal(Ctx.getIntWidth(SizeT), Length);
+  Expr *SizeExpr = IntegerLiteral::Create(Ctx, SizeVal, SizeT, Loc);
+
+  bool EvalResult = Message->EvaluateCharRangeAsString(
+      Result, SizeExpr, EvaluatedData.get(), Ctx, Status);
+  if (!EvalResult || !Notes.empty())
+    return DiagnoseInvalidConstantString();
+  SemaRef.Diag(Loc, diag::ext_consteval_string_constants);
+  return true;
+}
+
 template <typename ResultType>
 static bool EvaluateAsStringImpl(Sema &SemaRef, Expr *Message,
                                  ResultType &Result, ASTContext &Ctx,
@@ -17726,6 +17764,10 @@ static bool EvaluateAsStringImpl(Sema &SemaRef, Expr 
*Message,
 
   SourceLocation Loc = Message->getBeginLoc();
   QualType T = Message->getType().getNonReferenceType();
+  if (T->isPointerType() && T->getPointeeType()->isCharType())
+    return EvaluateAsNullTerminatedCharBuffer(
+        SemaRef, Message, Result, Ctx, EvalContext, ErrorOnInvalidMessage);
+
   auto *RD = T->getAsCXXRecordDecl();
   if (!RD) {
     SemaRef.Diag(Loc, diag::err_user_defined_msg_invalid) << EvalContext;
diff --git a/clang/test/Parser/asm.cpp b/clang/test/Parser/asm.cpp
index cf23b6f42a077..517af5e777ce6 100644
--- a/clang/test/Parser/asm.cpp
+++ b/clang/test/Parser/asm.cpp
@@ -35,6 +35,9 @@ struct string_view {
 int foo1 asm ((string_view("test"))); // expected-error {{expected string 
literal in 'asm'}}
 int func() asm ((string_view("test"))); // expected-error {{expected string 
literal in 'asm'}}
 
+constexpr const char* getConstantString(const char* s) {
+  return s;
+}
 
 void f2() {
   asm(string_view("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
@@ -44,6 +47,13 @@ void f2() {
   asm("" :: string_view("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
   asm(::string_view("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
 
+  asm(getConstantString("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
+  asm("" : getConstantString("")); // expected-error {{expected string literal 
or parenthesized constant expression in 'asm'}}
+  asm("" : : getConstantString("")); // expected-error {{expected string 
literal or parenthesized constant expression in 'asm'}}
+  asm("" : : : getConstantString("")); // expected-error {{expected ')'}}
+  asm("" :: getConstantString("")); // expected-error {{expected string 
literal or parenthesized constant expression in 'asm'}}
+  asm(::getConstantString("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
+
   int i;
 
   asm((string_view("")));
@@ -55,5 +65,22 @@ void f2() {
   asm("" : (::string_view("+g")) (i) : (::string_view("g")) (0) : 
(string_view("memory")));
 
 
-  asm((0)); // expected-error {{the expression in this asm operand must be a 
string literal or an object with 'data()' and 'size()' member functions}}
+  asm((getConstantString("")));
+  // expected-warning@-1 {{consteval string constants are an extension}}
+  asm((::getConstantString("")));
+  // expected-warning@-1 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g")) (i));
+  // expected-warning@-1 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g"))); // expected-error {{expected '(' 
after 'asm operand'}}
+  // expected-warning@-1 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g")) (0));
+  // expected-warning@-1 2 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g"))); // 
expected-error {{expected '(' after 'asm operand'}}
+  // expected-warning@-1 2 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g")) (0) : 
(getConstantString("memory")));
+  // expected-warning@-1 3 {{consteval string constants are an extension}}
+
+
+
+  asm((0)); // expected-error {{the expression in this asm operand must be a 
null terminated constant string or an object with 'data()' and 'size()' member 
functions}}
 }
diff --git a/clang/test/SemaCXX/gnu-asm-constexpr.cpp 
b/clang/test/SemaCXX/gnu-asm-constexpr.cpp
index 77466df12bdc1..f1d2862a62918 100644
--- a/clang/test/SemaCXX/gnu-asm-constexpr.cpp
+++ b/clang/test/SemaCXX/gnu-asm-constexpr.cpp
@@ -77,7 +77,7 @@ struct string_view {
 
 
 void f() {
-    asm(("")); // expected-error {{the expression in this asm operand must be 
a string literal or an object with 'data()' and 'size()' member functions}}
+    asm(("")); // expected-error {{the expression in this asm operand must be 
a null terminated constant string or an object with 'data()' and 'size()' 
member functions}}
     asm((NotAString{})); // expected-error {{the string object in this asm 
operand is missing 'data()' and 'size()' member functions}}
     asm((MessageInvalidData{})); // expected-error {{the expression in this 
asm operand must have a 'data()' member function returning an object 
convertible to 'const char *'}} \
                                  // expected-error {{too few arguments to 
function call, expected 1, have 0}}
@@ -106,7 +106,7 @@ void test_dependent1(int i) {
 
 template void test_dependent1<int>(int);
 // expected-note@-1 {{in instantiation of function template specialization}}
-// expected-error@#err-int {{the expression in this asm operand must be a 
string literal or an object with 'data()' and 'size()' member functions}}
+// expected-error@#err-int {{the expression in this asm operand must be a null 
terminated constant string or an object with 'data()' and 'size()' member 
functions}}
 // expected-error@#err-int2 {{cannot initialize a value of type 'int' with an 
lvalue of type 'const char[3]'}}
 // expected-error@#err-int3 {{cannot initialize a value of type 'int' with an 
lvalue of type 'const char[2]'}}
 // expected-error@#err-int4 {{cannot initialize a value of type 'int' with an 
lvalue of type 'const char[7]'}}
diff --git a/clang/test/SemaCXX/static-assert-cxx26.cpp 
b/clang/test/SemaCXX/static-assert-cxx26.cpp
index b2ebd2abb785e..79be55610b27c 100644
--- a/clang/test/SemaCXX/static-assert-cxx26.cpp
+++ b/clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 -std=c++2c -triple=x86_64-linux -fsyntax-only %s -verify 
-fexperimental-new-constant-interpreter
 
 static_assert(true, "");
-static_assert(true, 0); // expected-error {{the message in a static assertion 
must be a string literal or an object with 'data()' and 'size()' member 
functions}}
+static_assert(true, 0); // expected-error {{the message in a static assertion 
must be a null terminated constant string or an object with 'data()' and 
'size()' member functions}}
 struct Empty{};
 static_assert(true, Empty{}); // expected-error {{the message object in this 
static assertion is missing 'data()' and 'size()' member functions}}
 struct NoData {
@@ -288,7 +288,7 @@ struct Good {
 
 template <typename Ty>
 struct Bad {
-  static_assert(false, Ty{}); // expected-error {{the message in a static 
assertion must be a string literal or an object with 'data()' and 'size()' 
member functions}} \
+  static_assert(false, Ty{}); // expected-error {{the message in a static 
assertion must be a null terminated constant string or an object with 'data()' 
and 'size()' member functions}} \
                               // expected-error {{static assertion failed}}
 };
 
@@ -416,3 +416,50 @@ static_assert(
       // expected-note@-1 {{read of dereferenced one-past-the-end pointer is 
not allowed in a constant expression}}
 );
 }
+
+static_assert(false, &(" basic test"[1]));
+// expected-error@-1 {{static assertion failed: basic test}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+
+constexpr const char *constexpr_global = "global_constexpr";
+constexpr const char null_terminated_buffer[] = { 'n', 'u', 'l', 'l', 't', 
'e', 'r', 'm', 0 };
+constexpr const char no_null_buffer[] = { 'n', 'o', 'n', 'u', 'l', 'l', 't', 
'e', 'r', 'm' };
+
+constexpr const char *selector(int i) {
+  constexpr const char * a_constant = "a_constant";
+  const char *non_constexpr = "non-constexpr string";
+  switch (i) {
+    case 0: return "case 0";
+    case 1: return a_constant;
+    case 2: return constexpr_global;
+    case 3: return null_terminated_buffer;
+    case 4: return &(""[1]); // point to after the null terminator
+    case 5: return nullptr;
+    case 6: return no_null_buffer;
+  }
+};
+
+static_assert(false, selector(0));
+// expected-error@-1 {{static assertion failed: case 0}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+static_assert(false, selector(1));
+// expected-error@-1 {{static assertion failed: a_constant}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+static_assert(false, selector(2));
+// expected-error@-1 {{static assertion failed: global_constexpr}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+static_assert(false, selector(3));
+// expected-error@-1 {{static assertion failed: nullterm}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+static_assert(false, selector(4));
+// expected-error@-1 {{the message in a static assertion is not null 
terminated}}
+// expected-error@-2 {{static assertion failed}}
+static_assert(false, selector(5));
+// expected-error@-1 {{the message in a static assertion is not null 
terminated}}
+// expected-error@-2 {{static assertion failed}}
+static_assert(false, selector(6));
+// expected-error@-1 {{the message in a static assertion is not null 
terminated}}
+// expected-error@-2 {{static assertion failed}}
+static_assert(false, selector(7));
+// expected-error@-1 {{the message in a static assertion is not null 
terminated}}
+// expected-error@-2 {{static assertion failed}}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to