https://github.com/AaronBallman updated 
https://github.com/llvm/llvm-project/pull/137829

>From 7ed11d01dfda559f16ee823d6274ada321621466 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aa...@aaronballman.com>
Date: Tue, 29 Apr 2025 11:43:53 -0400
Subject: [PATCH 1/3] [C] Add diagnostic + attr for unterminated strings

This introduces three things related to intialization like:

char buf[3] = "foo";

where the array does not declare enough space for the null terminator
but otherwise can represent the array contents exactly.

1) An attribute named 'nonstring' which can be used to mark that a
   field or variable is not intended to hold string data.

2) -Wunterminated-string-initialization, which is grouped under
   -Wextra, and diagnoses the above construct unless the declaration
   uses the 'nonstring' attribute.

3) -Wc++-unterminated-string-initialization, which is grouped under
   -Wc++-compat, and diagnoses the above construct even if the
   declaration uses the 'nonstring' attribute.

Fixes #137705
---
 clang/docs/ReleaseNotes.rst                   | 17 ++++++++
 clang/include/clang/Basic/Attr.td             |  6 +++
 clang/include/clang/Basic/AttrDocs.td         | 15 +++++++
 clang/include/clang/Basic/DiagnosticGroups.td |  8 +++-
 .../clang/Basic/DiagnosticSemaKinds.td        | 12 ++++++
 clang/lib/Sema/SemaDeclAttr.cpp               | 17 ++++++++
 clang/lib/Sema/SemaInit.cpp                   | 35 ++++++++++++----
 clang/test/Sema/attr-nonstring.c              | 41 +++++++++++++++++++
 8 files changed, 141 insertions(+), 10 deletions(-)
 create mode 100644 clang/test/Sema/attr-nonstring.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index bc68bb8b70b3d..1adb59a970329 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -171,6 +171,23 @@ C Language Changes
   ``-Wenum-conversion`` and ``-Wimplicit-int-enum-cast``. This conversion is an
   int-to-enum conversion because the enumeration on the right-hand side is
   promoted to ``int`` before the assignment.
+- Added ``-Wunterminated-string-initialization``, grouped under ``-Wextra``,
+  which diagnoses an initialization from a string literal where only the null
+  terminator cannot be stored. e.g.,
+
+  .. code-block:: c
+
+
+    char buf1[3] = "foo"; // -Wunterminated-string-initialization
+    char buf2[3] = "flarp"; // -Wexcess-initializers
+
+  This diagnostic can be suppressed by adding the new ``nonstring`` attribute
+  to the field or variable being initialized. #GH137705
+- Added ``-Wc++-unterminated-string-initialization``, grouped under
+  ``-Wc++-compat``, which also diagnoses the same cases as
+  ``-Wunterminated-string-initialization``. However, this diagnostic is not
+  silenced by the ``nonstring`` attribute as these initializations are always
+  incompatible with C++.
 
 C2y Feature Support
 ^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index dcdcff8c46fe2..3e426bb685924 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5024,3 +5024,9 @@ def OpenACCRoutineDecl :InheritableAttr {
     void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) 
const;
   }];
 }
+
+def NonString : InheritableAttr {
+  let Spellings = [GCC<"nonstring">];
+  let Subjects = SubjectList<[Var, Field]>;
+  let Documentation = [NonStringDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 6e2dbe5655189..9f2edb81df4f8 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9294,3 +9294,18 @@ Declares that a function potentially allocates heap 
memory, and prevents any pot
 of ``nonallocating`` by the compiler.
   }];
 }
+
+def NonStringDocs : Documentation {
+  let Category = DocCatDecl;
+  let Content = [{
+The ``nonstring`` attribute can be applied to the declaration of a variable or
+a field whose type is a character array to specify that the character array is
+not intended to behave like a null-terminated string. This will silence
+diagnostics with code like:
+
+.. code-block:: c
+
+  char BadStr[3] = "foo"; // No space for the null terminator, diagnosed
+  __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
+  }];
+}
\ No newline at end of file
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td 
b/clang/include/clang/Basic/DiagnosticGroups.td
index fc1ce197ef134..75e8fc541305b 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -156,6 +156,10 @@ def BuiltinRequiresHeader : 
DiagGroup<"builtin-requires-header">;
 def C99Compat : DiagGroup<"c99-compat">;
 def C23Compat : DiagGroup<"c23-compat">;
 def : DiagGroup<"c2x-compat", [C23Compat]>;
+def InitStringTooLongMissingNonString :
+  DiagGroup<"unterminated-string-initialization">;
+def InitStringTooLongForCpp :
+  DiagGroup<"c++-unterminated-string-initialization">;
 def HiddenCppDecl : DiagGroup<"c++-hidden-decl">;
 def DefaultConstInitUnsafe : DiagGroup<"default-const-init-unsafe">;
 def DefaultConstInit : DiagGroup<"default-const-init", 
[DefaultConstInitUnsafe]>;
@@ -163,7 +167,8 @@ def ImplicitVoidPtrCast : 
DiagGroup<"implicit-void-ptr-cast">;
 def ImplicitIntToEnumCast : DiagGroup<"implicit-int-enum-cast",
                                       [ImplicitEnumEnumCast]>;
 def CXXCompat: DiagGroup<"c++-compat", [ImplicitVoidPtrCast, DefaultConstInit,
-                                        ImplicitIntToEnumCast, HiddenCppDecl]>;
+                                        ImplicitIntToEnumCast, HiddenCppDecl,
+                                        InitStringTooLongForCpp]>;
 
 def ExternCCompat : DiagGroup<"extern-c-compat">;
 def KeywordCompat : DiagGroup<"keyword-compat">;
@@ -1143,6 +1148,7 @@ def Extra : DiagGroup<"extra", [
     StringConcatation,
     FUseLdPath,
     CastFunctionTypeMismatch,
+    InitStringTooLongMissingNonString,
   ]>;
 
 def Most : DiagGroup<"most", [
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index ad5bf26be2590..3f1901f6bc7a9 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3804,6 +3804,9 @@ def err_attribute_weakref_without_alias : Error<
   "weakref declaration of %0 must also have an alias attribute">;
 def err_alias_not_supported_on_darwin : Error <
   "aliases are not supported on darwin">;
+def warn_attribute_non_character_array : Warning<
+  "%0%select{ attribute|}1 only applies to fields or variables of character "
+  "array type; type here is %2">, InGroup<IgnoredAttributes>;
 def warn_attribute_wrong_decl_type_str : Warning<
   "%0%select{ attribute|}1 only applies to %2">, InGroup<IgnoredAttributes>;
 def err_attribute_wrong_decl_type_str : Error<
@@ -6404,6 +6407,15 @@ def err_initializer_string_for_char_array_too_long : 
Error<
 def ext_initializer_string_for_char_array_too_long : ExtWarn<
   "initializer-string for char array is too long">,
   InGroup<ExcessInitializers>;
+def warn_initializer_string_for_char_array_too_long_no_nonstring : Warning<
+  "initializer-string for character array is too long, array size is %0 but "
+  "initializer has size %1 (including the null terminating character); did you 
"
+  "mean to use the 'nonstring' attribute?">,
+  InGroup<InitStringTooLongMissingNonString>, DefaultIgnore;
+def warn_initializer_string_for_char_array_too_long_for_cpp : Warning<
+  "initializer-string for character array is too long for C++, array size is "
+  "%0 but initializer has size %1 (including the null terminating character)">,
+  InGroup<InitStringTooLongForCpp>, DefaultIgnore;
 def warn_missing_field_initializers : Warning<
   "missing field %0 initializer">,
   InGroup<MissingFieldInitializers>, DefaultIgnore;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 413999b95b998..6726b48688838 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4907,6 +4907,20 @@ void Sema::AddModeAttr(Decl *D, const 
AttributeCommonInfo &CI,
   D->addAttr(::new (Context) ModeAttr(Context, CI, Name));
 }
 
+static void handleNonStringAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  // This only applies to fields and variable declarations which have an array
+  // type.
+  QualType QT = cast<ValueDecl>(D)->getType();
+  if (!QT->isArrayType() ||
+      !QT->getBaseElementTypeUnsafe()->isAnyCharacterType()) {
+    S.Diag(D->getBeginLoc(), diag::warn_attribute_non_character_array)
+        << AL << AL.isRegularKeywordAttribute() << QT << AL.getRange();
+    return;
+  }
+
+  D->addAttr(::new (S.Context) NonStringAttr(S.Context, AL));
+}
+
 static void handleNoDebugAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (S.Context) NoDebugAttr(S.Context, AL));
 }
@@ -7144,6 +7158,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, 
const ParsedAttr &AL,
   case ParsedAttr::AT_Mode:
     handleModeAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_NonString:
+    handleNonStringAttr(S, D, AL);
+    break;
   case ParsedAttr::AT_NonNull:
     if (auto *PVD = dyn_cast<ParmVarDecl>(D))
       handleNonNullAttrParameter(S, PVD, AL);
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index a85eca4d4ac3c..34766fd87af33 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -210,7 +210,8 @@ static void CheckC23ConstexprInitStringLiteral(const 
StringLiteral *SE,
                                                Sema &SemaRef, QualType &TT);
 
 static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
-                            Sema &S, bool CheckC23ConstexprInit = false) {
+                            Sema &S, const InitializedEntity &Entity,
+                            bool CheckC23ConstexprInit = false) {
   // Get the length of the string as parsed.
   auto *ConstantArrayTy =
       cast<ConstantArrayType>(Str->getType()->getAsArrayTypeUnsafe());
@@ -232,6 +233,7 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, 
const ArrayType *AT,
   }
 
   const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
+  uint64_t ArrayLen = CAT->getZExtSize();
 
   // We have an array of character type with known size.  However,
   // the size may be smaller or larger than the string we are initializing.
@@ -247,16 +249,31 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, 
const ArrayType *AT,
     }
 
     // [dcl.init.string]p2
-    if (StrLength > CAT->getZExtSize())
+    if (StrLength > ArrayLen)
       S.Diag(Str->getBeginLoc(),
              diag::err_initializer_string_for_char_array_too_long)
-          << CAT->getZExtSize() << StrLength << Str->getSourceRange();
+          << ArrayLen << StrLength << Str->getSourceRange();
   } else {
     // C99 6.7.8p14.
-    if (StrLength - 1 > CAT->getZExtSize())
+    if (StrLength - 1 > ArrayLen)
       S.Diag(Str->getBeginLoc(),
              diag::ext_initializer_string_for_char_array_too_long)
           << Str->getSourceRange();
+    else if (StrLength - 1 == ArrayLen) {
+      // If the entity being initialized has the nonstring attribute, then
+      // silence the "missing nonstring" diagnostic.
+      if (const ValueDecl *D = Entity.getDecl();
+          !D || !D->hasAttr<NonStringAttr>())
+        S.Diag(
+            Str->getBeginLoc(),
+            diag::warn_initializer_string_for_char_array_too_long_no_nonstring)
+            << ArrayLen << StrLength << Str->getSourceRange();
+
+      // Always emit the C++ compatibility diagnostic.
+      S.Diag(Str->getBeginLoc(),
+             diag::warn_initializer_string_for_char_array_too_long_for_cpp)
+          << ArrayLen << StrLength << Str->getSourceRange();
+    }
   }
 
   // Set the type to the actual size that we are initializing.  If we have
@@ -1579,7 +1596,7 @@ void InitListChecker::CheckSubElementType(const 
InitializedEntity &Entity,
     if (IsStringInit(expr, arrayType, SemaRef.Context) == SIF_None) {
       // FIXME: Should we do this checking in verify-only mode?
       if (!VerifyOnly)
-        CheckStringInit(expr, ElemType, arrayType, SemaRef,
+        CheckStringInit(expr, ElemType, arrayType, SemaRef, Entity,
                         SemaRef.getLangOpts().C23 &&
                             initializingConstexprVariable(Entity));
       if (StructuredList)
@@ -2089,9 +2106,9 @@ void InitListChecker::CheckArrayType(const 
InitializedEntity &Entity,
       // constant for each string.
       // FIXME: Should we do these checks in verify-only mode too?
       if (!VerifyOnly)
-        CheckStringInit(IList->getInit(Index), DeclType, arrayType, SemaRef,
-                        SemaRef.getLangOpts().C23 &&
-                            initializingConstexprVariable(Entity));
+        CheckStringInit(
+            IList->getInit(Index), DeclType, arrayType, SemaRef, Entity,
+            SemaRef.getLangOpts().C23 && 
initializingConstexprVariable(Entity));
       if (StructuredList) {
         UpdateStructuredListElement(StructuredList, StructuredIndex,
                                     IList->getInit(Index));
@@ -8405,7 +8422,7 @@ ExprResult InitializationSequence::Perform(Sema &S,
       QualType Ty = Step->Type;
       bool UpdateType = ResultType && 
Entity.getType()->isIncompleteArrayType();
       CheckStringInit(CurInit.get(), UpdateType ? *ResultType : Ty,
-                      S.Context.getAsArrayType(Ty), S,
+                      S.Context.getAsArrayType(Ty), S, Entity,
                       S.getLangOpts().C23 &&
                           initializingConstexprVariable(Entity));
       break;
diff --git a/clang/test/Sema/attr-nonstring.c b/clang/test/Sema/attr-nonstring.c
new file mode 100644
index 0000000000000..81c6c7a318f88
--- /dev/null
+++ b/clang/test/Sema/attr-nonstring.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,no-nonstring 
-Wunterminated-string-initialization %s
+// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,no-nonstring -Wextra 
%s
+// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,compat 
-Wc++-unterminated-string-initialization %s
+// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,compat -Wc++-compat %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,noncxx 
-Wno-unterminated-string-initialization %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,noncxx -Wc++-compat -Wextra 
-Wno-unterminated-string-initialization 
-Wno-c++-unterminated-string-initialization %s
+
+char foo[3] = "foo"; // no-nonstring-warning {{initializer-string for 
character array is too long, array size is 3 but initializer has size 4 
(including the null terminating character); did you mean to use the 'nonstring' 
attribute?}} \
+                        compat-warning {{initializer-string for character 
array is too long for C++, array size is 3 but initializer has size 4 
(including the null terminating character)}} \
+                        cxx-error {{initializer-string for char array is too 
long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
+__attribute__((nonstring)) char bar[3] = "bar"; // compat-warning 
{{initializer-string for character array is too long for C++, array size is 3 
but initializer has size 4 (including the null terminating character)}} \
+                                                   cxx-error 
{{initializer-string for char array is too long, array size is 3 but 
initializer has size 4 (including the null terminating character)}}
+
+struct S {
+  char buf[3];
+  __attribute__((nonstring)) char fub[3];
+} s = { "baz", "bop" }; // no-nonstring-warning {{initializer-string for 
character array is too long, array size is 3 but initializer has size 4 
(including the null terminating character); did you mean to use the 'nonstring' 
attribute?}} \
+                           compat-warning 2 {{initializer-string for character 
array is too long for C++, array size is 3 but initializer has size 4 
(including the null terminating character)}} \
+                           cxx-error 2 {{initializer-string for char array is 
too long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
+
+// Show that we also issue the diagnostic for the other character types as 
well.
+signed char scfoo[3] = "foo";    // no-nonstring-warning {{initializer-string 
for character array is too long, array size is 3 but initializer has size 4 
(including the null terminating character); did you mean to use the 'nonstring' 
attribute?}} \
+                                    compat-warning {{initializer-string for 
character array is too long for C++, array size is 3 but initializer has size 4 
(including the null terminating character)}} \
+                                    cxx-error {{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
+unsigned char ucfoo[3] = "foo";  // no-nonstring-warning {{initializer-string 
for character array is too long, array size is 3 but initializer has size 4 
(including the null terminating character); did you mean to use the 'nonstring' 
attribute?}} \
+                                    compat-warning {{initializer-string for 
character array is too long for C++, array size is 3 but initializer has size 4 
(including the null terminating character)}} \
+                                    cxx-error {{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
+
+// wchar_t (et al) are a bit funny in that it cannot do string init in C++ at
+// all, so they are not tested.
+
+// Show that we properly diagnose incorrect uses of nonstring.
+__attribute__((nonstring)) void func(void);       // expected-warning 
{{'nonstring' attribute only applies to variables and non-static data members}}
+__attribute__((nonstring("test"))) char eek1[2];  // expected-error 
{{'nonstring' attribute takes no arguments}}
+__attribute__((nonstring)) int eek2;              // expected-warning 
{{'nonstring' attribute only applies to fields or variables of character array 
type; type here is 'int'}}
+
+// Note, these diagnostics are separate from the "too many initializers"
+// diagnostic when you overwrite more than just the null terminator.
+char too_big[3] = "this is too big"; // noncxx-warning {{initializer-string 
for char array is too long}} \
+                                        cxx-error {{initializer-string for 
char array is too long, array size is 3 but initializer has size 16 (including 
the null terminating character)}}

>From f2e673105574a80184c1cf55cd80c27126ac645c Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aa...@aaronballman.com>
Date: Tue, 29 Apr 2025 12:02:13 -0400
Subject: [PATCH 2/3] New and improved with a newline

---
 clang/include/clang/Basic/AttrDocs.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 9f2edb81df4f8..cbb397cb31dfb 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9308,4 +9308,4 @@ diagnostics with code like:
   char BadStr[3] = "foo"; // No space for the null terminator, diagnosed
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
-}
\ No newline at end of file
+}

>From 0bc8d0a02e20fbbc90813a925aa3fb9b9a1bc0c4 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aa...@aaronballman.com>
Date: Tue, 29 Apr 2025 12:03:16 -0400
Subject: [PATCH 3/3] Updated diagnostic wording

---
 clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 +-
 clang/test/Sema/attr-nonstring.c                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 3f1901f6bc7a9..823d4bb687497 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3806,7 +3806,7 @@ def err_alias_not_supported_on_darwin : Error <
   "aliases are not supported on darwin">;
 def warn_attribute_non_character_array : Warning<
   "%0%select{ attribute|}1 only applies to fields or variables of character "
-  "array type; type here is %2">, InGroup<IgnoredAttributes>;
+  "array type; type is %2">, InGroup<IgnoredAttributes>;
 def warn_attribute_wrong_decl_type_str : Warning<
   "%0%select{ attribute|}1 only applies to %2">, InGroup<IgnoredAttributes>;
 def err_attribute_wrong_decl_type_str : Error<
diff --git a/clang/test/Sema/attr-nonstring.c b/clang/test/Sema/attr-nonstring.c
index 81c6c7a318f88..31728e701a3fc 100644
--- a/clang/test/Sema/attr-nonstring.c
+++ b/clang/test/Sema/attr-nonstring.c
@@ -33,7 +33,7 @@ unsigned char ucfoo[3] = "foo";  // no-nonstring-warning 
{{initializer-string fo
 // Show that we properly diagnose incorrect uses of nonstring.
 __attribute__((nonstring)) void func(void);       // expected-warning 
{{'nonstring' attribute only applies to variables and non-static data members}}
 __attribute__((nonstring("test"))) char eek1[2];  // expected-error 
{{'nonstring' attribute takes no arguments}}
-__attribute__((nonstring)) int eek2;              // expected-warning 
{{'nonstring' attribute only applies to fields or variables of character array 
type; type here is 'int'}}
+__attribute__((nonstring)) int eek2;              // expected-warning 
{{'nonstring' attribute only applies to fields or variables of character array 
type; type is 'int'}}
 
 // Note, these diagnostics are separate from the "too many initializers"
 // diagnostic when you overwrite more than just the null terminator.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to