https://github.com/AaronBallman updated https://github.com/llvm/llvm-project/pull/137829
>From 7ed11d01dfda559f16ee823d6274ada321621466 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Tue, 29 Apr 2025 11:43:53 -0400 Subject: [PATCH 1/3] [C] Add diagnostic + attr for unterminated strings This introduces three things related to intialization like: char buf[3] = "foo"; where the array does not declare enough space for the null terminator but otherwise can represent the array contents exactly. 1) An attribute named 'nonstring' which can be used to mark that a field or variable is not intended to hold string data. 2) -Wunterminated-string-initialization, which is grouped under -Wextra, and diagnoses the above construct unless the declaration uses the 'nonstring' attribute. 3) -Wc++-unterminated-string-initialization, which is grouped under -Wc++-compat, and diagnoses the above construct even if the declaration uses the 'nonstring' attribute. Fixes #137705 --- clang/docs/ReleaseNotes.rst | 17 ++++++++ clang/include/clang/Basic/Attr.td | 6 +++ clang/include/clang/Basic/AttrDocs.td | 15 +++++++ clang/include/clang/Basic/DiagnosticGroups.td | 8 +++- .../clang/Basic/DiagnosticSemaKinds.td | 12 ++++++ clang/lib/Sema/SemaDeclAttr.cpp | 17 ++++++++ clang/lib/Sema/SemaInit.cpp | 35 ++++++++++++---- clang/test/Sema/attr-nonstring.c | 41 +++++++++++++++++++ 8 files changed, 141 insertions(+), 10 deletions(-) create mode 100644 clang/test/Sema/attr-nonstring.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index bc68bb8b70b3d..1adb59a970329 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -171,6 +171,23 @@ C Language Changes ``-Wenum-conversion`` and ``-Wimplicit-int-enum-cast``. This conversion is an int-to-enum conversion because the enumeration on the right-hand side is promoted to ``int`` before the assignment. +- Added ``-Wunterminated-string-initialization``, grouped under ``-Wextra``, + which diagnoses an initialization from a string literal where only the null + terminator cannot be stored. e.g., + + .. code-block:: c + + + char buf1[3] = "foo"; // -Wunterminated-string-initialization + char buf2[3] = "flarp"; // -Wexcess-initializers + + This diagnostic can be suppressed by adding the new ``nonstring`` attribute + to the field or variable being initialized. #GH137705 +- Added ``-Wc++-unterminated-string-initialization``, grouped under + ``-Wc++-compat``, which also diagnoses the same cases as + ``-Wunterminated-string-initialization``. However, this diagnostic is not + silenced by the ``nonstring`` attribute as these initializations are always + incompatible with C++. C2y Feature Support ^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index dcdcff8c46fe2..3e426bb685924 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -5024,3 +5024,9 @@ def OpenACCRoutineDecl :InheritableAttr { void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) const; }]; } + +def NonString : InheritableAttr { + let Spellings = [GCC<"nonstring">]; + let Subjects = SubjectList<[Var, Field]>; + let Documentation = [NonStringDocs]; +} diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 6e2dbe5655189..9f2edb81df4f8 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9294,3 +9294,18 @@ Declares that a function potentially allocates heap memory, and prevents any pot of ``nonallocating`` by the compiler. }]; } + +def NonStringDocs : Documentation { + let Category = DocCatDecl; + let Content = [{ +The ``nonstring`` attribute can be applied to the declaration of a variable or +a field whose type is a character array to specify that the character array is +not intended to behave like a null-terminated string. This will silence +diagnostics with code like: + +.. code-block:: c + + char BadStr[3] = "foo"; // No space for the null terminator, diagnosed + __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed + }]; +} \ No newline at end of file diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index fc1ce197ef134..75e8fc541305b 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -156,6 +156,10 @@ def BuiltinRequiresHeader : DiagGroup<"builtin-requires-header">; def C99Compat : DiagGroup<"c99-compat">; def C23Compat : DiagGroup<"c23-compat">; def : DiagGroup<"c2x-compat", [C23Compat]>; +def InitStringTooLongMissingNonString : + DiagGroup<"unterminated-string-initialization">; +def InitStringTooLongForCpp : + DiagGroup<"c++-unterminated-string-initialization">; def HiddenCppDecl : DiagGroup<"c++-hidden-decl">; def DefaultConstInitUnsafe : DiagGroup<"default-const-init-unsafe">; def DefaultConstInit : DiagGroup<"default-const-init", [DefaultConstInitUnsafe]>; @@ -163,7 +167,8 @@ def ImplicitVoidPtrCast : DiagGroup<"implicit-void-ptr-cast">; def ImplicitIntToEnumCast : DiagGroup<"implicit-int-enum-cast", [ImplicitEnumEnumCast]>; def CXXCompat: DiagGroup<"c++-compat", [ImplicitVoidPtrCast, DefaultConstInit, - ImplicitIntToEnumCast, HiddenCppDecl]>; + ImplicitIntToEnumCast, HiddenCppDecl, + InitStringTooLongForCpp]>; def ExternCCompat : DiagGroup<"extern-c-compat">; def KeywordCompat : DiagGroup<"keyword-compat">; @@ -1143,6 +1148,7 @@ def Extra : DiagGroup<"extra", [ StringConcatation, FUseLdPath, CastFunctionTypeMismatch, + InitStringTooLongMissingNonString, ]>; def Most : DiagGroup<"most", [ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ad5bf26be2590..3f1901f6bc7a9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3804,6 +3804,9 @@ def err_attribute_weakref_without_alias : Error< "weakref declaration of %0 must also have an alias attribute">; def err_alias_not_supported_on_darwin : Error < "aliases are not supported on darwin">; +def warn_attribute_non_character_array : Warning< + "%0%select{ attribute|}1 only applies to fields or variables of character " + "array type; type here is %2">, InGroup<IgnoredAttributes>; def warn_attribute_wrong_decl_type_str : Warning< "%0%select{ attribute|}1 only applies to %2">, InGroup<IgnoredAttributes>; def err_attribute_wrong_decl_type_str : Error< @@ -6404,6 +6407,15 @@ def err_initializer_string_for_char_array_too_long : Error< def ext_initializer_string_for_char_array_too_long : ExtWarn< "initializer-string for char array is too long">, InGroup<ExcessInitializers>; +def warn_initializer_string_for_char_array_too_long_no_nonstring : Warning< + "initializer-string for character array is too long, array size is %0 but " + "initializer has size %1 (including the null terminating character); did you " + "mean to use the 'nonstring' attribute?">, + InGroup<InitStringTooLongMissingNonString>, DefaultIgnore; +def warn_initializer_string_for_char_array_too_long_for_cpp : Warning< + "initializer-string for character array is too long for C++, array size is " + "%0 but initializer has size %1 (including the null terminating character)">, + InGroup<InitStringTooLongForCpp>, DefaultIgnore; def warn_missing_field_initializers : Warning< "missing field %0 initializer">, InGroup<MissingFieldInitializers>, DefaultIgnore; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 413999b95b998..6726b48688838 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -4907,6 +4907,20 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI, D->addAttr(::new (Context) ModeAttr(Context, CI, Name)); } +static void handleNonStringAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + // This only applies to fields and variable declarations which have an array + // type. + QualType QT = cast<ValueDecl>(D)->getType(); + if (!QT->isArrayType() || + !QT->getBaseElementTypeUnsafe()->isAnyCharacterType()) { + S.Diag(D->getBeginLoc(), diag::warn_attribute_non_character_array) + << AL << AL.isRegularKeywordAttribute() << QT << AL.getRange(); + return; + } + + D->addAttr(::new (S.Context) NonStringAttr(S.Context, AL)); +} + static void handleNoDebugAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(::new (S.Context) NoDebugAttr(S.Context, AL)); } @@ -7144,6 +7158,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_Mode: handleModeAttr(S, D, AL); break; + case ParsedAttr::AT_NonString: + handleNonStringAttr(S, D, AL); + break; case ParsedAttr::AT_NonNull: if (auto *PVD = dyn_cast<ParmVarDecl>(D)) handleNonNullAttrParameter(S, PVD, AL); diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index a85eca4d4ac3c..34766fd87af33 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -210,7 +210,8 @@ static void CheckC23ConstexprInitStringLiteral(const StringLiteral *SE, Sema &SemaRef, QualType &TT); static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, - Sema &S, bool CheckC23ConstexprInit = false) { + Sema &S, const InitializedEntity &Entity, + bool CheckC23ConstexprInit = false) { // Get the length of the string as parsed. auto *ConstantArrayTy = cast<ConstantArrayType>(Str->getType()->getAsArrayTypeUnsafe()); @@ -232,6 +233,7 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, } const ConstantArrayType *CAT = cast<ConstantArrayType>(AT); + uint64_t ArrayLen = CAT->getZExtSize(); // We have an array of character type with known size. However, // the size may be smaller or larger than the string we are initializing. @@ -247,16 +249,31 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, } // [dcl.init.string]p2 - if (StrLength > CAT->getZExtSize()) + if (StrLength > ArrayLen) S.Diag(Str->getBeginLoc(), diag::err_initializer_string_for_char_array_too_long) - << CAT->getZExtSize() << StrLength << Str->getSourceRange(); + << ArrayLen << StrLength << Str->getSourceRange(); } else { // C99 6.7.8p14. - if (StrLength - 1 > CAT->getZExtSize()) + if (StrLength - 1 > ArrayLen) S.Diag(Str->getBeginLoc(), diag::ext_initializer_string_for_char_array_too_long) << Str->getSourceRange(); + else if (StrLength - 1 == ArrayLen) { + // If the entity being initialized has the nonstring attribute, then + // silence the "missing nonstring" diagnostic. + if (const ValueDecl *D = Entity.getDecl(); + !D || !D->hasAttr<NonStringAttr>()) + S.Diag( + Str->getBeginLoc(), + diag::warn_initializer_string_for_char_array_too_long_no_nonstring) + << ArrayLen << StrLength << Str->getSourceRange(); + + // Always emit the C++ compatibility diagnostic. + S.Diag(Str->getBeginLoc(), + diag::warn_initializer_string_for_char_array_too_long_for_cpp) + << ArrayLen << StrLength << Str->getSourceRange(); + } } // Set the type to the actual size that we are initializing. If we have @@ -1579,7 +1596,7 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity, if (IsStringInit(expr, arrayType, SemaRef.Context) == SIF_None) { // FIXME: Should we do this checking in verify-only mode? if (!VerifyOnly) - CheckStringInit(expr, ElemType, arrayType, SemaRef, + CheckStringInit(expr, ElemType, arrayType, SemaRef, Entity, SemaRef.getLangOpts().C23 && initializingConstexprVariable(Entity)); if (StructuredList) @@ -2089,9 +2106,9 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity, // constant for each string. // FIXME: Should we do these checks in verify-only mode too? if (!VerifyOnly) - CheckStringInit(IList->getInit(Index), DeclType, arrayType, SemaRef, - SemaRef.getLangOpts().C23 && - initializingConstexprVariable(Entity)); + CheckStringInit( + IList->getInit(Index), DeclType, arrayType, SemaRef, Entity, + SemaRef.getLangOpts().C23 && initializingConstexprVariable(Entity)); if (StructuredList) { UpdateStructuredListElement(StructuredList, StructuredIndex, IList->getInit(Index)); @@ -8405,7 +8422,7 @@ ExprResult InitializationSequence::Perform(Sema &S, QualType Ty = Step->Type; bool UpdateType = ResultType && Entity.getType()->isIncompleteArrayType(); CheckStringInit(CurInit.get(), UpdateType ? *ResultType : Ty, - S.Context.getAsArrayType(Ty), S, + S.Context.getAsArrayType(Ty), S, Entity, S.getLangOpts().C23 && initializingConstexprVariable(Entity)); break; diff --git a/clang/test/Sema/attr-nonstring.c b/clang/test/Sema/attr-nonstring.c new file mode 100644 index 0000000000000..81c6c7a318f88 --- /dev/null +++ b/clang/test/Sema/attr-nonstring.c @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,no-nonstring -Wunterminated-string-initialization %s +// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,no-nonstring -Wextra %s +// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,compat -Wc++-unterminated-string-initialization %s +// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,compat -Wc++-compat %s +// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ %s +// RUN: %clang_cc1 -fsyntax-only -verify=expected,noncxx -Wno-unterminated-string-initialization %s +// RUN: %clang_cc1 -fsyntax-only -verify=expected,noncxx -Wc++-compat -Wextra -Wno-unterminated-string-initialization -Wno-c++-unterminated-string-initialization %s + +char foo[3] = "foo"; // no-nonstring-warning {{initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \ + compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +__attribute__((nonstring)) char bar[3] = "bar"; // compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} + +struct S { + char buf[3]; + __attribute__((nonstring)) char fub[3]; +} s = { "baz", "bop" }; // no-nonstring-warning {{initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \ + compat-warning 2 {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + cxx-error 2 {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} + +// Show that we also issue the diagnostic for the other character types as well. +signed char scfoo[3] = "foo"; // no-nonstring-warning {{initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \ + compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +unsigned char ucfoo[3] = "foo"; // no-nonstring-warning {{initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \ + compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \ + cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} + +// wchar_t (et al) are a bit funny in that it cannot do string init in C++ at +// all, so they are not tested. + +// Show that we properly diagnose incorrect uses of nonstring. +__attribute__((nonstring)) void func(void); // expected-warning {{'nonstring' attribute only applies to variables and non-static data members}} +__attribute__((nonstring("test"))) char eek1[2]; // expected-error {{'nonstring' attribute takes no arguments}} +__attribute__((nonstring)) int eek2; // expected-warning {{'nonstring' attribute only applies to fields or variables of character array type; type here is 'int'}} + +// Note, these diagnostics are separate from the "too many initializers" +// diagnostic when you overwrite more than just the null terminator. +char too_big[3] = "this is too big"; // noncxx-warning {{initializer-string for char array is too long}} \ + cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 16 (including the null terminating character)}} >From f2e673105574a80184c1cf55cd80c27126ac645c Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Tue, 29 Apr 2025 12:02:13 -0400 Subject: [PATCH 2/3] New and improved with a newline --- clang/include/clang/Basic/AttrDocs.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 9f2edb81df4f8..cbb397cb31dfb 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9308,4 +9308,4 @@ diagnostics with code like: char BadStr[3] = "foo"; // No space for the null terminator, diagnosed __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed }]; -} \ No newline at end of file +} >From 0bc8d0a02e20fbbc90813a925aa3fb9b9a1bc0c4 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Tue, 29 Apr 2025 12:03:16 -0400 Subject: [PATCH 3/3] Updated diagnostic wording --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/test/Sema/attr-nonstring.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 3f1901f6bc7a9..823d4bb687497 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3806,7 +3806,7 @@ def err_alias_not_supported_on_darwin : Error < "aliases are not supported on darwin">; def warn_attribute_non_character_array : Warning< "%0%select{ attribute|}1 only applies to fields or variables of character " - "array type; type here is %2">, InGroup<IgnoredAttributes>; + "array type; type is %2">, InGroup<IgnoredAttributes>; def warn_attribute_wrong_decl_type_str : Warning< "%0%select{ attribute|}1 only applies to %2">, InGroup<IgnoredAttributes>; def err_attribute_wrong_decl_type_str : Error< diff --git a/clang/test/Sema/attr-nonstring.c b/clang/test/Sema/attr-nonstring.c index 81c6c7a318f88..31728e701a3fc 100644 --- a/clang/test/Sema/attr-nonstring.c +++ b/clang/test/Sema/attr-nonstring.c @@ -33,7 +33,7 @@ unsigned char ucfoo[3] = "foo"; // no-nonstring-warning {{initializer-string fo // Show that we properly diagnose incorrect uses of nonstring. __attribute__((nonstring)) void func(void); // expected-warning {{'nonstring' attribute only applies to variables and non-static data members}} __attribute__((nonstring("test"))) char eek1[2]; // expected-error {{'nonstring' attribute takes no arguments}} -__attribute__((nonstring)) int eek2; // expected-warning {{'nonstring' attribute only applies to fields or variables of character array type; type here is 'int'}} +__attribute__((nonstring)) int eek2; // expected-warning {{'nonstring' attribute only applies to fields or variables of character array type; type is 'int'}} // Note, these diagnostics are separate from the "too many initializers" // diagnostic when you overwrite more than just the null terminator. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits