llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: None (apple-fcloutier) <details> <summary>Changes</summary> This implements ``__attribute__((format_matches))``, as described in the RFC: https://discourse.llvm.org/t/rfc-format-attribute-attribute-format-like/83076 The ``format`` attribute only allows the compiler to check that a format string matches its arguments. If the format string is passed independently of its arguments, there is no way to have the compiler check it. ``format_matches(flavor, fmtidx, example)`` allows the compiler to check format strings against the ``example`` format string instead of against format arguments. See the changes to AttrDocs.td in this diff for more information. Implementation-wise, this change subclasses CheckPrintfHandler and CheckScanfHandler to allow them to collect specifiers into arrays, and implements comparing that two specifiers are equivalent. `checkFormatStringExpr` gets a new `ReferenceFormatString` argument that is piped down when calling a function with the `format_matches` attribute (and is `nullptr` otherwise); this is the string that the actual format string is compared against. Although this change does not enable -Wformat-nonliteral by default, IMO, all the pieces are now in place such that it could be. --- Patch is 80.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116708.diff 12 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+44) - (modified) clang/include/clang/AST/FormatString.h (+2-1) - (modified) clang/include/clang/Basic/Attr.td (+10) - (modified) clang/include/clang/Basic/AttrDocs.td (+97) - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+21) - (modified) clang/include/clang/Sema/Sema.h (+27-10) - (modified) clang/lib/AST/AttrImpl.cpp (+5) - (modified) clang/lib/AST/FormatString.cpp (+91) - (modified) clang/lib/Sema/SemaChecking.cpp (+607-147) - (modified) clang/lib/Sema/SemaDeclAttr.cpp (+93-27) - (modified) clang/lib/Sema/SemaObjC.cpp (+2-1) - (added) clang/test/Sema/format-string-matches.c (+122) ``````````diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0efe62f1804cd0a..66e4758fe89e243 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -357,6 +357,50 @@ Non-comprehensive list of changes in this release - ``__builtin_reduce_add`` function can now be used in constant expressions. +- There is a new ``format_matches`` attribute to complement the existing + ``format`` attribute. ``format_matches`` allows the compiler to verify that + a format string argument is equivalent to a reference format string: it is + useful when a function accepts a format string without its accompanying + arguments to format. For instance: + + .. code-block:: c + + static int status_code; + static const char *status_string; + + void print_status(const char *fmt) { + fprintf(stderr, fmt, status_code, status_string); + // ^ warning: format string is not a string literal [-Wformat-nonliteral] + } + + void stuff(void) { + print_status("%s (%#08x)\n"); + // order of %s and %x is swapped but there is no diagnostic + } + + Before the introducion of ``format_matches``, this code cannot be verified + at compile-time. ``format_matches`` plugs that hole: + + .. code-block:: c + + __attribute__((format_matches(printf, 1, "%x %s"))) + void print_status(const char *fmt) { + fprintf(stderr, fmt, status_code, status_string); + // ^ `fmt` verified as if it was "%x %s" here; no longer triggers + // -Wformat-nonliteral, would warn if arguments did not match "%x %s" + } + + void stuff(void) { + print_status("%s (%#08x)\n"); + // warning: format specifier 's' is incompatible with 'x' + // warning: format specifier 'x' is incompatible with 's' + } + + Like with ``format``, the first argument is the format string flavor and the + second argument is the index of the format string parameter. + ``format_matches`` accepts an example valid format string as its third + argument. For more information, see the Clang attributes documentation. + New Compiler Flags ------------------ diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index a074dd23e2ad4c7..3560766433fe220 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -292,7 +292,7 @@ class ArgType { }; private: - const Kind K; + Kind K; QualType T; const char *Name = nullptr; bool Ptr = false; @@ -338,6 +338,7 @@ class ArgType { } MatchKind matchesType(ASTContext &C, QualType argTy) const; + MatchKind matchesArgType(ASTContext &C, const ArgType &other) const; QualType getRepresentativeType(ASTContext &C) const; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 6035a563d5fce77..7723e631aa2529c 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1807,6 +1807,16 @@ def Format : InheritableAttr { let Documentation = [FormatDocs]; } +def FormatMatches : InheritableAttr { + let Spellings = [GCC<"format_matches">]; + let Args = [IdentifierArgument<"Type">, IntArgument<"FormatIdx">, ExprArgument<"ExpectedFormat">]; + let AdditionalMembers = [{ + StringLiteral *getFormatString() const; + }]; + let Subjects = SubjectList<[ObjCMethod, Block, HasFunctionProto]>; + let Documentation = [FormatMatchesDocs]; +} + def FormatArg : InheritableAttr { let Spellings = [GCC<"format_arg">]; let Args = [ParamIdxArgument<"FormatIdx">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2fdceca163ee63b..2ca1bf1a454271a 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3738,6 +3738,103 @@ behavior of the program is undefined. }]; } +def FormatMatchesDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ + +The ``format`` attribute is the basis for the enforcement of diagnostics in the +``-Wformat`` family, but it only handles the case where the format string is +passed along with the arguments it is going to format. It cannot handle the case +where the format string and the format arguments are passed separately from each +other. For instance: + +.. code-block:: c + + static const char *first_name; + static double todays_temperature; + static int wind_speed; + + void say_hi(const char *fmt) { + printf(fmt, first_name, todays_temperature); // warning: format string is not a string literal + printf(fmt, first_name, wind_speed); // warning: format string is not a string literal + } + + int main() { + say_hi("hello %s, it is %g degrees outside"); + say_hi("hello %s, it is %d degrees outside!"); // no diagnostic + } + +In this example, ``fmt`` is expected to format a ``const char *`` and a +``double``, but these values are not passed to ``say_hi``. Without the +``format`` attribute (which cannot apply in this case), the -Wformat-nonliteral +diagnostic triggers in the body of ``say_hi``, and incorrect ``say_hi`` call +sites do not trigger a diagnostic. + +To complement the ``format`` attribute, Clang also defines the +``format_matches`` attribute. Its syntax is similar to the ``format`` +attribute's, but instead of taking the index of the first formatted value +argument, it takes a C string literal with the expected specifiers: + +.. code-block:: c + + static const char *first_name; + static double todays_temperature; + static int wind_speed; + + __attribute__((__format_matches__(printf, 1, "%s %g"))) + void say_hi(const char *fmt) { + printf(fmt, first_name, todays_temperature); // no dignostic + printf(fmt, first_name, wind_speed); // warning: format specifies type 'int' but the argument has type 'double' + } + + int main() { + say_hi("hello %s, it is %g degrees outside"); + say_hi("it is %g degrees outside, have a good day %s!"); + // warning: format specifies 'double' where 'const char *' is required + // warning: format specifies 'const char *' where 'double' is required + } + +The third argument to ``format_matches`` is expected to evaluate to a **C string +literal** even when the format string would normally be a different type for the +given flavor, like a ``CFStringRef`` or a ``NSString *``. + +In the implementation of a function with the ``format_matches`` attribute, +format verification works as if the format string was identical to the one +specified in the attribute. + +At the call sites of functions with the ``format_matches`` attribute, format +verification instead compares the two format strings to evaluate their +equivalence. Each format flavor defines equivalence between format specifiers. +Generally speaking, two specifiers are equivalent if they format the same type. +For instance, in the ``printf`` flavor, ``%2i`` and ``%-0.5d`` are compatible. +When ``-Wformat-signedness`` is disabled, ``%d`` and ``%u`` are compatible. For +a negative example, ``%ld`` is incompatible with ``%d``. + +Do note the following un-obvious cases: + +* Passing ``NULL`` as the format string is accepted without diagnostics. +* When the format string is not NULL, it cannot _miss_ specifiers, even in + trailing positions. For instance, ``%d`` is not accepted when the required + format is ``%d %d %d``. +* Specifiers for integers as small or smaller than ``int`` (such as ``%hhd``) + are all mutually compatible because standard argument promotion ensures that + integers are at least the size of ``int`` when passed as variadic arguments. + With ``-Wformat-signedness``, mixing specifier for types with a different + signedness still results in a diagnostic. +* Format strings expecting a variable modifier (such as ``%*s``) are + incompatible with format strings that would itemize the variable modifiers + (such as ``%i %s``), even if the two specify ABI-compatible argument lists. +* All pointer specifiers, modifiers aside, are mutually incompatible. For + instance, ``%s`` is not compatible with ``%p``, and ``%p`` is not compatible + with ``%n``, and ``%hhn`` is incompatible with ``%s``, even if the pointers + are ABI-compatible or identical on the selected platform. However, ``%0.5s`` + is compatible with ``%s``, since the difference only exists in modifier flags. + This is not overridable with ``-Wformat-pedantic`` or its inverse, which + control similar behavior in ``-Wformat``. + + }]; +} + def FlagEnumDocs : Documentation { let Category = DocCatDecl; let Content = [{ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 3caf471d3037f9e..0e7ffc95f8b53ef 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7687,6 +7687,7 @@ def warn_format_nonliteral_noargs : Warning< def warn_format_nonliteral : Warning< "format string is not a string literal">, InGroup<FormatNonLiteral>, DefaultIgnore; +def err_format_nonliteral : Error<"format string is not a string literal">; def err_unexpected_interface : Error< "unexpected interface name %0: expected expression">; @@ -9998,6 +9999,8 @@ def note_previous_declaration_as : Note< def warn_printf_insufficient_data_args : Warning< "more '%%' conversions than data arguments">, InGroup<FormatInsufficientArgs>; +def warn_format_cmp_insufficient_specifiers : Warning< + "fewer specifiers in format string than expected">, InGroup<FormatInsufficientArgs>; def warn_printf_data_arg_not_used : Warning< "data argument not used by format string">, InGroup<FormatExtraArgs>; def warn_format_invalid_conversion : Warning< @@ -10115,6 +10118,24 @@ def note_format_fix_specifier : Note<"did you mean to use '%0'?">; def note_printf_c_str: Note<"did you mean to call the %0 method?">; def note_format_security_fixit: Note< "treat the string as an argument to avoid this">; +def warn_format_cmp_role_mismatch : Warning< + "format argument is %select{a value|an indirect field width|an indirect " + "precision|an auxiliary value}0, but it should be %select{a value|an indirect " + "field width|an indirect precision|an auxiliary value}1">, InGroup<Format>; +def warn_format_cmp_modifierfor_mismatch : Warning< + "format argument modifies specifier at position %0, but it should modify " + "specifier at position %1">, InGroup<Format>; +def warn_format_cmp_sensitivity_mismatch : Warning< + "argument sensitivity is %select{unspecified|private|public|sensitive}0, but " + "it should be %select{unspecified|private|public|sensitive}1">, InGroup<Format>; +def warn_format_cmp_specifier_mismatch : Warning< + "format specifier '%0' is incompatible with '%1'">, InGroup<Format>; +def warn_format_cmp_specifier_sign_mismatch : Warning< + "signedness of format specifier '%0' is incompatible with '%1'">, InGroup<Format>; +def warn_format_cmp_specifier_mismatch_pedantic : Extension< + warn_format_cmp_specifier_sign_mismatch.Summary>, InGroup<FormatPedantic>; +def note_format_cmp_with : Note< + "comparing with this %select{specifier|format string}0">; def warn_null_arg : Warning< "null passed to a callee that requires a non-null argument">, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index d6f3508a5243f36..22df0bc2923db4e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2150,6 +2150,7 @@ class Sema final : public SemaBase { FAPK_Fixed, // values to format are fixed (no C-style variadic arguments) FAPK_Variadic, // values to format are passed as variadic arguments FAPK_VAList, // values to format are passed in a va_list + FAPK_Elsewhere, // values to format are not passed to this function }; // Used to grab the relevant information from a FormatAttr and a @@ -2160,12 +2161,15 @@ class Sema final : public SemaBase { FormatArgumentPassingKind ArgPassingKind; }; - /// Given a FunctionDecl's FormatAttr, attempts to populate the - /// FomatStringInfo parameter with the FormatAttr's correct format_idx and - /// firstDataArg. Returns true when the format fits the function and the - /// FormatStringInfo has been populated. - static bool getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember, - bool IsVariadic, FormatStringInfo *FSI); + /// Given a function and its FormatAttr or FormatMatchesAttr info, attempts to + /// populate the FomatStringInfo parameter with the attribute's correct + /// format_idx and firstDataArg. Returns true when the format fits the + /// function and the FormatStringInfo has been populated. + static bool getFormatStringInfo(const Decl *Function, unsigned FormatIdx, + unsigned FirstArg, FormatStringInfo *FSI); + static bool getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg, + bool IsCXXMember, bool IsVariadic, + FormatStringInfo *FSI); // Used by C++ template instantiation. ExprResult BuiltinShuffleVector(CallExpr *TheCall); @@ -2188,7 +2192,9 @@ class Sema final : public SemaBase { FST_Syslog, FST_Unknown }; + static FormatStringType GetFormatStringType(StringRef FormatFlavor); static FormatStringType GetFormatStringType(const FormatAttr *Format); + static FormatStringType GetFormatStringType(const FormatMatchesAttr *Format); bool FormatStringHasSArg(const StringLiteral *FExpr); @@ -2535,11 +2541,17 @@ class Sema final : public SemaBase { VariadicCallType CallType, SourceLocation Loc, SourceRange Range, llvm::SmallBitVector &CheckedVarArgs); + bool CheckFormatString(const FormatMatchesAttr *Format, + ArrayRef<const Expr *> Args, bool IsCXXMember, + VariadicCallType CallType, SourceLocation Loc, + SourceRange Range, + llvm::SmallBitVector &CheckedVarArgs); bool CheckFormatArguments(ArrayRef<const Expr *> Args, - FormatArgumentPassingKind FAPK, unsigned format_idx, - unsigned firstDataArg, FormatStringType Type, - VariadicCallType CallType, SourceLocation Loc, - SourceRange range, + FormatArgumentPassingKind FAPK, + const StringLiteral *ReferenceFormatString, + unsigned format_idx, unsigned firstDataArg, + FormatStringType Type, VariadicCallType CallType, + SourceLocation Loc, SourceRange range, llvm::SmallBitVector &CheckedVarArgs); void CheckInfNaNFunction(const CallExpr *Call, const FunctionDecl *FDecl); @@ -4527,6 +4539,11 @@ class Sema final : public SemaBase { FormatAttr *mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI, IdentifierInfo *Format, int FormatIdx, int FirstArg); + FormatMatchesAttr *mergeFormatMatchesAttr(Decl *D, + const AttributeCommonInfo &CI, + IdentifierInfo *Format, + int FormatIdx, + StringLiteral *FormatStr); /// AddAlignedAttr - Adds an aligned attribute to a particular declaration. void AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E, diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp index f198a9acf8481fd..5a169f815637f6f 100644 --- a/clang/lib/AST/AttrImpl.cpp +++ b/clang/lib/AST/AttrImpl.cpp @@ -270,4 +270,9 @@ unsigned AlignedAttr::getAlignment(ASTContext &Ctx) const { return Ctx.getTargetDefaultAlignForAttributeAligned(); } +StringLiteral *FormatMatchesAttr::getFormatString() const { + // This cannot go in headers because StringLiteral and Expr may be incomplete. + return cast<StringLiteral>(getExpectedFormat()); +} + #include "clang/AST/AttrImpl.inc" diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index e892c1592df9869..5d3b56fc4e71377 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -595,6 +595,97 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const { llvm_unreachable("Invalid ArgType Kind!"); } +static analyze_format_string::ArgType::MatchKind +integerTypeMatch(ASTContext &C, QualType A, QualType B, bool CheckSign) { + using MK = analyze_format_string::ArgType::MatchKind; + + uint64_t IntSize = C.getTypeSize(C.IntTy); + uint64_t ASize = C.getTypeSize(A); + uint64_t BSize = C.getTypeSize(B); + if (std::max(ASize, IntSize) != std::max(BSize, IntSize)) + return MK::NoMatch; + if (CheckSign && A->isSignedIntegerType() != B->isSignedIntegerType()) + return MK::NoMatchSignedness; + if (ASize != BSize) + return MK::MatchPromotion; + return MK::Match; +} + +analyze_format_string::ArgType::MatchKind +ArgType::matchesArgType(ASTContext &C, const ArgType &Other) const { + using AK = analyze_format_string::ArgType::Kind; + + // Per matchesType. + if (K == AK::InvalidTy || Other.K == AK::InvalidTy) + return NoMatch; + if (K == AK::UnknownTy || Other.K == AK::UnknownTy) + return Match; + + // Handle whether either (or both, or neither) sides has Ptr set, + // in addition to whether either (or both, or neither) sides is a SpecificTy + // that is a pointer. + ArgType Left = *this; + bool LeftWasPointer = false; + ArgType Right = Other; + bool RightWasPointer = false; + if (Left.Ptr) { + Left.Ptr = false; + LeftWasPointer = true; + } else if (Left.K == AK::SpecificTy && Left.T->isPointerType()) { + Left.T = Left.T->getPointeeType(); + LeftWasPointer = true; + } + if (Right.Ptr) { + Right.Ptr = false; + RightWasPointer = true; + } else if (Right.K == AK::SpecificTy && Right.T->isPointerType()) { + Right.T = Right.T->getPointeeType(); + RightWasPointer = true; + } + + if (LeftWasPointer != RightWasPointer) + return NoMatch; + + // Ensure that if at least one side is a SpecificTy, then Left is a + // SpecificTy. + if (Right.K == AK::SpecificTy) + std::swap(Left, Right); + + if (Left.K == AK::SpecificTy) { + if (Right.K == AK::SpecificTy) { + auto Canon1 = C.getCanonicalType(Left.T); + auto Canon2 = C.getCanonicalType(Right.T); + if (Canon1 == Canon2) + return Match; + + auto *BT1 = QualType(Canon1)->getAs<BuiltinType>(); + auto *BT2 = QualType(Canon2)->getAs<BuiltinType>(); + if (BT1 == nullptr || BT2 == nullptr) + return NoMatch; + if (BT1 == BT2) + return Match; + + if (!LeftWasPointer && BT1->isInteger() && BT2->isInteger()) + return integerTypeMatch(C, Canon1, Canon2, true); + return NoMatch; + } else if (Right.K == AK::AnyCharTy) { + if (!LeftWasPointer && Left.T->isIntegerType()) + return integerTypeMatch(C, Left.T, C.CharTy, false); + return NoMatch; + } else if (Right.K == AK::WIntTy) { + if (!LeftWasPointer && Left.T->isIntegerType()) + return integerTypeMatch(C, Left.T, C.WIntTy, true); + return NoMatch; + } + // It's hypothetically possible to create an AK::SpecificTy ArgType + // that matches another kind of ArgType, but in practice Clang doesn't + // do that, so ignore that case. + return NoMatch; + } + + return Left.K == Right.K ? Match : NoMatch; +} + ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const { // Check for valid vector element types. if (T.isNull()) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2d4a7cd287b70d7..27fd147fda7708b 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3016,17 +3016,33 @@ bool Sema::ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) { << ArgNum << Arg->getSourceRange(); } -bool Sema::getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember, - bool IsVariadic, FormatStringInfo *FSI) { - if (Format->getFirstArg() == 0) +bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx, + unsigned FirstArg, FormatStringInfo *FSI) { + ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/116708 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits