Author: ziqingluo-90 Date: 2024-09-05T16:56:34-07:00 New Revision: d7dd2c468fecae871ba67e891a3519c758c94b63
URL: https://github.com/llvm/llvm-project/commit/d7dd2c468fecae871ba67e891a3519c758c94b63 DIFF: https://github.com/llvm/llvm-project/commit/d7dd2c468fecae871ba67e891a3519c758c94b63.diff LOG: Re-land "[-Wunsafe-buffer-usage] Warning Libc functions (#101583)" Revert commit 23457964392d00fc872fa6021763859024fb38da, and re-land with a new flag "-Wunsafe-buffer-usage-in-libc-call" for the new warning. (rdar://117182250) Added: clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp Modified: clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/Analysis/UnsafeBufferUsage.cpp clang/lib/Sema/AnalysisBasedWarnings.cpp clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..267cde64f8f23c 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,20 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + /// \param UnsafeArg one of the actual arguments that is unsafe, non-null + /// only when `2 <= PrintfInfo <= 3` + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, + ASTContext &Ctx, + const Expr *UnsafeArg = nullptr) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, @@ -151,6 +166,10 @@ class UnsafeBufferUsageHandler { virtual bool ignoreUnsafeBufferInContainer(const SourceLocation &Loc) const = 0; + /// \return true iff unsafe libc call should NOT be reported at `Loc` + virtual bool + ignoreUnsafeBufferInLibcCall(const SourceLocation &Loc) const = 0; + virtual std::string getUnsafeBufferUsageAttributeTextAt(SourceLocation Loc, StringRef WSSuffix = "") const = 0; diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..09fa510bc0472e 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -18,10 +18,10 @@ #define WARNING_GADGET(name) GADGET(name) #endif -/// A `WARNING_GADGET` subset, where the code pattern of each gadget -/// corresponds uses of a (possibly hardened) contatiner (e.g., `std::span`). -#ifndef WARNING_CONTAINER_GADGET -#define WARNING_CONTAINER_GADGET(name) WARNING_GADGET(name) +/// A `WARNING_GADGET` subset, each of which may be enable/disable separately +/// with diff erent flags +#ifndef WARNING_OPTIONAL_GADGET +#define WARNING_OPTIONAL_GADGET(name) WARNING_GADGET(name) #endif /// Safe gadgets correspond to code patterns that aren't unsafe but need to be @@ -38,7 +38,8 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) -WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` +WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall) +WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) FIXABLE_GADGET(PointerDereference) @@ -52,5 +53,5 @@ FIXABLE_GADGET(PointerInit) #undef FIXABLE_GADGET #undef WARNING_GADGET -#undef WARNING_CONTAINER_GADGET +#undef WARNING_OPTIONAL_GADGET #undef GADGET diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index c4c29942ee1cbd..116ce7a04f66f7 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1558,7 +1558,8 @@ def ReadOnlyPlacementChecks : DiagGroup<"read-only-types">; // Warnings and fixes to support the "safe buffers" programming model. def UnsafeBufferUsageInContainer : DiagGroup<"unsafe-buffer-usage-in-container">; -def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInContainer]>; +def UnsafeBufferUsageInLibcCall : DiagGroup<"unsafe-buffer-usage-in-libc-call">; +def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInContainer, UnsafeBufferUsageInLibcCall]>; // Warnings and notes related to the function effects system underlying // the nonblocking and nonallocating attributes. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 72ea5338ce615f..083684670a9805 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12416,6 +12416,13 @@ def warn_unsafe_buffer_operation : Warning< "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data|" "field %1 prone to unsafe buffer manipulation}0">, InGroup<UnsafeBufferUsage>, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 is unsafe">, + InGroup<UnsafeBufferUsageInLibcCall>, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{|change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "|string argument is not guaranteed to be null-terminated" + "|'va_list' is unsafe}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index da7446913f7c87..ec76eae8b60776 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -10,12 +10,12 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" +#include "clang/AST/FormatString.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" @@ -247,6 +247,11 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, return Handler->ignoreUnsafeBufferInContainer(Node.getBeginLoc()); } +AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *, + Handler) { + return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); +} + AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) { return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); } @@ -443,6 +448,425 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) { + return Node.getNumArgs() == Num; +} + +namespace libc_func_matchers { +// Under `libc_func_matchers`, define a set of matchers that match unsafe +// functions in libc and unsafe calls to them. + +// A tiny parser to strip off common prefix and suffix of libc function names +// in real code. +// +// Given a function name, `matchName` returns `CoreName` according to the +// following grammar: +// +// LibcName := CoreName | CoreName + "_s" +// MatchingName := "__builtin_" + LibcName | +// "__builtin___" + LibcName + "_chk" | +// "__asan_" + LibcName +// +struct LibcFunNamePrefixSuffixParser { + StringRef matchName(StringRef FunName, bool isBuiltin) { + // Try to match __builtin_: + if (isBuiltin && FunName.starts_with("__builtin_")) + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or + // no match: + return matchLibcNameOrBuiltinChk( + FunName.drop_front(10 /* truncate "__builtin_" */)); + // Try to match __asan_: + if (FunName.starts_with("__asan_")) + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); + return matchLibcName(FunName); + } + + // Parameter `Name` is the substring after stripping off the prefix + // "__builtin_". + StringRef matchLibcNameOrBuiltinChk(StringRef Name) { + if (Name.starts_with("__") && Name.ends_with("_chk")) + return matchLibcName( + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); + return matchLibcName(Name); + } + + StringRef matchLibcName(StringRef Name) { + if (Name.ends_with("_s")) + return Name.drop_back(2 /* truncate "_s" */); + return Name; + } +}; + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) + return true; + if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts())) + return true; + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { + const CXXMethodDecl *MD = MCE->getMethodDecl(); + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + + if (MD && RD && RD->isInStdNamespace()) + if (MD->getName() == "c_str" && RD->getName() == "basic_string") + return true; + } + return false; +} + +// Return true iff at least one of following cases holds: +// 1. Format string is a literal and there is an unsafe pointer argument +// corresponding to an `s` specifier; +// 2. Format string is not a literal and there is least an unsafe pointer +// argument (including the formatter argument). +// +// `UnsafeArg` is the output argument that will be set only if this function +// returns true. +static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, + const unsigned FmtArgIdx, ASTContext &Ctx, + bool isKprintf = false) { + class StringFormatStringHandler + : public analyze_format_string::FormatStringHandler { + const CallExpr *Call; + unsigned FmtArgIdx; + const Expr *&UnsafeArg; + + public: + StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx, + const Expr *&UnsafeArg) + : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {} + + bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, + const char *startSpecifier, + unsigned specifierLen, + const TargetInfo &Target) override { + if (FS.getConversionSpecifier().getKind() == + analyze_printf::PrintfConversionSpecifier::sArg) { + unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; + + if (0 < ArgIdx && ArgIdx < Call->getNumArgs()) + if (!isNullTermPointer(Call->getArg(ArgIdx))) { + UnsafeArg = Call->getArg(ArgIdx); // output + // returning false stops parsing immediately + return false; + } + } + return true; // continue parsing + } + }; + + const Expr *Fmt = Call->getArg(FmtArgIdx); + + if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) { + StringRef FmtStr = SL->getString(); + StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg); + + return analyze_format_string::ParsePrintfString( + Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(), + Ctx.getTargetInfo(), isKprintf); + } + // If format is not a string literal, we cannot analyze the format string. + // In this case, this call is considered unsafe if at least one argument + // (including the format argument) is unsafe pointer. + return llvm::any_of( + llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), + [&UnsafeArg](const Expr *Arg) -> bool { + if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) { + UnsafeArg = Arg; + return true; + } + return false; + }); +} + +// Matches a FunctionDecl node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`. +// The notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) { + static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr; + if (!PredefinedNames) + PredefinedNames = + std::make_unique<std::set<StringRef>, std::set<StringRef>>({ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }); + + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); + + // Match predefined names: + if (PredefinedNames->find(Name) != PredefinedNames->end()) + return true; + + std::string NameWCS = Name.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { + NameWCS[WcsPos++] = 's'; + NameWCS[WcsPos++] = 't'; + NameWCS[WcsPos++] = 'r'; + WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames->find(NameWCS) != PredefinedNames->end()) + return true; + // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They + // all should end with "scanf"): + return Name.ends_with("scanf"); +} + +// Match a call to one of the `v*printf` functions taking `va_list`. We cannot +// check safety for these functions so they should be changed to their +// non-va_list versions. +AST_MATCHER(FunctionDecl, isUnsafeVaListPrintfFunc) { + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); + + if (!Name.ends_with("printf")) + return false; // neither printf nor scanf + return Name.starts_with("v"); +} + +// Matches a call to one of the `sprintf` functions as they are always unsafe +// and should be changed to `snprintf`. +AST_MATCHER(FunctionDecl, isUnsafeSprintfFunc) { + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); + + if (!Name.ends_with("printf") || + // Let `isUnsafeVaListPrintfFunc` check for cases with va-list: + Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + return Prefix == "s"; +} + +// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide +// character versions. Calls to these functions can be safe if their arguments +// are carefully made safe. +AST_MATCHER(FunctionDecl, isNormalPrintfFunc) { + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + + return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn"; +} + +// This matcher requires that it is known that the callee `isNormalPrintf`. +// Then if the format string is a string literal, this matcher matches when at +// least one string argument is unsafe. If the format is not a string literal, +// this matcher matches when at least one pointer type argument is unsafe. +AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, + clang::ast_matchers::internal::Matcher<Expr>, + UnsafeStringArgMatcher) { + // Determine what printf it is: + const Expr *FirstArg = Node.getArg(0); + ASTContext &Ctx = Finder->getASTContext(); + + if (isa<StringLiteral>(FirstArg->IgnoreParenImpCasts())) { + // It is a printf/kprintf. And, the format is a string literal: + bool isKprintf = false; + const Expr *UnsafeArg; + + if (auto *Callee = Node.getDirectCallee()) + if (auto *II = Callee->getIdentifier()) + isKprintf = II->getName() == "kprintf"; + if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf)) + return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder); + return false; + } + + QualType PtrTy = FirstArg->getType(); + + assert(PtrTy->isPointerType()); + + QualType PteTy = (cast<PointerType>(PtrTy))->getPointeeType(); + + if (!Ctx.getFILEType().isNull() /* If `FILE *` is not ever in the ASTContext, + there can't be any file pointer then */ + && PteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) { + // It is a fprintf: + const Expr *UnsafeArg; + + if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false)) + return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder); + return false; + } + + const Expr *SecondArg = Node.getArg(1); + + if (SecondArg->getType()->isIntegerType()) { + // It is a snprintf: + const Expr *UnsafeArg; + + if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false)) + return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder); + return false; + } + // It is printf but the format string is passed by pointer. The only thing we + // can do is to require all pointers to be null-terminated: + for (auto Arg : Node.arguments()) + if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) + if (UnsafeStringArgMatcher.matches(*Arg, Finder, Builder)) + return true; + return false; +} + +// This matcher requires that it is known that the callee `isNormalPrintf`. +// Then it matches if the first two arguments of the call is a pointer and an +// integer and they are not in a safe pattern. +// +// For the first two arguments: `ptr` and `size`, they are safe if in the +// following patterns: +// ptr := DRE.data(); +// size:= DRE.size()/DRE.size_bytes() +// And DRE is a hardened container or view. +AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { + if (Node.getNumArgs() < 3) + return false; // not an snprintf call + + const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); + + if (!Buf->getType()->isPointerType() || !Size->getType()->isIntegerType()) + return false; // not an snprintf call + + static StringRef SizedObjs[] = {"span", "array", "vector", + "basic_string_view", "basic_string"}; + Buf = Buf->IgnoreParenImpCasts(); + Size = Size->IgnoreParenImpCasts(); + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Buf)) + if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { + auto *DREOfPtr = dyn_cast<DeclRefExpr>( + MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts()); + auto *DREOfSize = dyn_cast<DeclRefExpr>( + MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts()); + + if (!DREOfPtr || !DREOfSize) + return true; // not in safe pattern + if (DREOfPtr->getDecl() != DREOfSize->getDecl()) + return true; // not in safe pattern + if (MCEPtr->getMethodDecl()->getName() != "data") + return true; // not in safe pattern + + if (MCESize->getMethodDecl()->getName() == "size_bytes" || + // Note here the pointer must be a pointer-to-char type unless there + // is explicit casting. If there is explicit casting, this branch + // is unreachable. Thus, at this branch "size" and "size_bytes" are + // equivalent as the pointer is a char pointer: + MCESize->getMethodDecl()->getName() == "size") + for (StringRef SizedObj : SizedObjs) + if (MCEPtr->getRecordDecl()->isInStdNamespace() && + MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() == + SizedObj) + return false; // It is in fact safe + } + return true; // ptr and size are not in safe pattern +} +} // namespace libc_func_matchers } // namespace clang::ast_matchers namespace { @@ -760,6 +1184,10 @@ class SpanTwoParamConstructorGadget : public WarningGadget { .bind(SpanTwoParamConstructorTag)); } + static Matcher matcher(const UnsafeBufferUsageHandler *Handler) { + return stmt(unless(ignoreUnsafeBufferInContainer(Handler)), matcher()); + } + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, bool IsRelatedToDecl, ASTContext &Ctx) const override { @@ -1030,6 +1458,98 @@ class DataInvocationGadget : public WarningGadget { DeclUseList getClaimedVarUseSites() const override { return {}; } }; +class UnsafeLibcFunctionCallGadget : public WarningGadget { + const CallExpr *const Call; + const Expr *UnsafeArg = nullptr; + constexpr static const char *const Tag = "UnsafeLibcFunctionCall"; + // Extra tags for additional information: + constexpr static const char *const UnsafeSprintfTag = + "UnsafeLibcFunctionCall_sprintf"; + constexpr static const char *const UnsafeSizedByTag = + "UnsafeLibcFunctionCall_sized_by"; + constexpr static const char *const UnsafeStringTag = + "UnsafeLibcFunctionCall_string"; + constexpr static const char *const UnsafeVaListTag = + "UnsafeLibcFunctionCall_va_list"; + + enum UnsafeKind { + OTHERS = 0, // no specific information, the callee function is unsafe + SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead. + SIZED_BY = + 2, // the first two arguments of `snprintf` function have + // "__sized_by" relation but they do not conform to safe patterns + STRING = 3, // an argument is a pointer-to-char-as-string but does not + // guarantee null-termination + VA_LIST = 4, // one of the `-printf`s function that take va_list, which is + // considered unsafe as it is not compile-time check + } WarnedFunKind = OTHERS; + +public: + UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::UnsafeLibcFunctionCall), + Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) { + if (Result.Nodes.getNodeAs<Decl>(UnsafeSprintfTag)) + WarnedFunKind = SPRINTF; + else if (auto *E = Result.Nodes.getNodeAs<Expr>(UnsafeStringTag)) { + WarnedFunKind = STRING; + UnsafeArg = E; + } else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag)) { + WarnedFunKind = SIZED_BY; + UnsafeArg = Call->getArg(0); + } else if (Result.Nodes.getNodeAs<Decl>(UnsafeVaListTag)) + WarnedFunKind = VA_LIST; + } + + static Matcher matcher(const UnsafeBufferUsageHandler *Handler) { + return stmt(unless(ignoreUnsafeLibcCall(Handler)), + anyOf( + callExpr( + callee(functionDecl(anyOf( + // Match a predefined unsafe libc + // function: + functionDecl(libc_func_matchers::isPredefinedUnsafeLibcFunc()), + // Match a call to one of the `v*printf` functions + // taking va-list, which cannot be checked at + // compile-time: + functionDecl(libc_func_matchers::isUnsafeVaListPrintfFunc()) + .bind(UnsafeVaListTag), + // Match a call to a `sprintf` function, which is never + // safe: + functionDecl(libc_func_matchers::isUnsafeSprintfFunc()) + .bind(UnsafeSprintfTag)))), + // (unless the call has a sole string literal argument): + unless( + allOf(hasArgument(0, expr(stringLiteral())), hasNumArgs(1)))), + + // The following two cases require checking against actual + // arguments of the call: + + // Match a call to an `snprintf` function. And first two + // arguments of the call (that describe a buffer) are not in + // safe patterns: + callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())), + libc_func_matchers::hasUnsafeSnprintfBuffer()) + .bind(UnsafeSizedByTag), + // Match a call to a `printf` function, which can be safe if + // all arguments are null-terminated: + callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())), + libc_func_matchers::hasUnsafePrintfStringArg( + expr().bind(UnsafeStringTag))))); + } + + const Stmt *getBaseStmt() const { return Call; } + + SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); } + + void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, + bool IsRelatedToDecl, + ASTContext &Ctx) const override { + Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx, UnsafeArg); + } + + DeclUseList getClaimedVarUseSites() const override { return {}; } +}; + // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue // Context (see `isInUnspecifiedLvalueContext`). // Note here `[]` is the built-in subscript operator. @@ -1452,10 +1972,9 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, #define WARNING_GADGET(x) \ allOf(x ## Gadget::matcher().bind(#x), \ notInSafeBufferOptOut(&Handler)), -#define WARNING_CONTAINER_GADGET(x) \ - allOf(x ## Gadget::matcher().bind(#x), \ - notInSafeBufferOptOut(&Handler), \ - unless(ignoreUnsafeBufferInContainer(&Handler))), +#define WARNING_OPTIONAL_GADGET(x) \ + allOf(x ## Gadget::matcher(&Handler).bind(#x), \ + notInSafeBufferOptOut(&Handler)), #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" // Avoid a hanging comma. unless(stmt()) diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index e6ce89dc7ec406..117b2c8bc57935 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2304,6 +2304,20 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { } } + void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, + ASTContext &Ctx, + const Expr *UnsafeArg = nullptr) override { + S.Diag(Call->getBeginLoc(), diag::warn_unsafe_buffer_libc_call) + << Call->getDirectCallee() // We've checked there is a direct callee + << Call->getSourceRange(); + if (PrintfInfo > 0) { + SourceRange R = + UnsafeArg ? UnsafeArg->getSourceRange() : Call->getSourceRange(); + S.Diag(R.getBegin(), diag::note_unsafe_buffer_printf_call) + << PrintfInfo << R; + } + } + void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) override { @@ -2382,6 +2396,10 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler { return S.Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, Loc); } + bool ignoreUnsafeBufferInLibcCall(const SourceLocation &Loc) const override { + return S.Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, Loc); + } + // Returns the text representation of clang::unsafe_buffer_usage attribute. // `WSSuffix` holds customized "white-space"s, e.g., newline or whilespace // characters. @@ -2548,6 +2566,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( !Diags.isIgnored(diag::warn_unsafe_buffer_variable, Node->getBeginLoc()) || !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, + Node->getBeginLoc()) || + !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, Node->getBeginLoc())) { clang::checkUnsafeBufferUsage(Node, R, UnsafeBufferUsageShouldEmitSuggestions); @@ -2560,7 +2580,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( if (!Diags.isIgnored(diag::warn_unsafe_buffer_operation, SourceLocation()) || !Diags.isIgnored(diag::warn_unsafe_buffer_variable, SourceLocation()) || !Diags.isIgnored(diag::warn_unsafe_buffer_usage_in_container, - SourceLocation())) { + SourceLocation()) || + !Diags.isIgnored(diag::warn_unsafe_buffer_libc_call, SourceLocation())) { CallableVisitor(CallAnalyzers).TraverseTranslationUnitDecl(TU); } } diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp new file mode 100644 index 00000000000000..2bd12db93fd526 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: -verify %s + +namespace std { + inline namespace __1 { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, + OutputIt d_first ); + + struct iterator{}; + template<typename T> + struct span { + T * ptr; + T * data(); + unsigned size_bytes(); + unsigned size(); + iterator begin() const noexcept; + iterator end() const noexcept; + }; + + template<typename T> + struct basic_string { + T* p; + T *c_str(); + T *data(); + unsigned size_bytes(); + }; + + typedef basic_string<char> string; + typedef basic_string<wchar_t> wstring; + + // C function under std: + void memcpy(); + void strcpy(); + int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); + } +} + +void f(char * p, char * q, std::span<char> s) { + std::memcpy(); // expected-warning{{function 'memcpy' is unsafe}} + std::strcpy(); // expected-warning{{function 'strcpy' is unsafe}} + std::__1::memcpy(); // expected-warning{{function 'memcpy' is unsafe}} + std::__1::strcpy(); // expected-warning{{function 'strcpy' is unsafe}} + + /* Test printfs */ + std::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}} + std::__1::snprintf(s.data(), 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}} + std::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn + std::__1::snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn +} + +void v(std::string s1) { + std::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn + std::__1::snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn +} + +void g(char *begin, char *end, char *p, std::span<char> s) { + std::copy(begin, end, p); // no warn + std::copy(s.begin(), s.end(), s.begin()); // no warn +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp new file mode 100644 index 00000000000000..0438f71b1c7922 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp @@ -0,0 +1,124 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \ +// RUN: -verify %s +// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \ +// RUN: -verify %s + +typedef struct {} FILE; +void memcpy(); +void __asan_memcpy(); +void strcpy(); +void strcpy_s(); +void wcscpy_s(); +unsigned strlen( const char* str ); +int fprintf( FILE* stream, const char* format, ... ); +int printf( const char* format, ... ); +int sprintf( char* buffer, const char* format, ... ); +int swprintf( char* buffer, const char* format, ... ); +int snprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int snwprintf_s( char* buffer, unsigned buf_size, const char* format, ... ); +int vsnprintf( char* buffer, unsigned buf_size, const char* format, ... ); +int sscanf_s(const char * buffer, const char * format, ...); +int sscanf(const char * buffer, const char * format, ... ); + +namespace std { + template< class InputIt, class OutputIt > + OutputIt copy( InputIt first, InputIt last, + OutputIt d_first ); + + struct iterator{}; + template<typename T> + struct span { + T * ptr; + T * data(); + unsigned size_bytes(); + unsigned size(); + iterator begin() const noexcept; + iterator end() const noexcept; + }; + + template<typename T> + struct basic_string { + T* p; + T *c_str(); + T *data(); + unsigned size_bytes(); + }; + + typedef basic_string<char> string; + typedef basic_string<wchar_t> wstring; + + // C function under std: + void memcpy(); + void strcpy(); +} + +void f(char * p, char * q, std::span<char> s, std::span<char> s2) { + memcpy(); // expected-warning{{function 'memcpy' is unsafe}} + std::memcpy(); // expected-warning{{function 'memcpy' is unsafe}} + __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' is unsafe}} + __builtin___memcpy_chk(p, q, 8, 64); // expected-warning{{function '__builtin___memcpy_chk' is unsafe}} + __asan_memcpy(); // expected-warning{{function '__asan_memcpy' is unsafe}} + strcpy(); // expected-warning{{function 'strcpy' is unsafe}} + std::strcpy(); // expected-warning{{function 'strcpy' is unsafe}} + strcpy_s(); // expected-warning{{function 'strcpy_s' is unsafe}} + wcscpy_s(); // expected-warning{{function 'wcscpy_s' is unsafe}} + + + /* Test printfs */ + fprintf((FILE*)p, "%s%d", p, *p); // expected-warning{{function 'fprintf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + printf("%s%d", // expected-warning{{function 'printf' is unsafe}} + p, // expected-note{{string argument is not guaranteed to be null-terminated}} note attached to the unsafe argument + *p); + sprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'sprintf' is unsafe}} expected-note{{change to 'snprintf' for explicit bounds checking}} + swprintf(q, "%s%d", "hello", *p); // expected-warning{{function 'swprintf' is unsafe}} expected-note{{change to 'snprintf' for explicit bounds checking}} + snprintf(q, 10, "%s%d", "hello", *p); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}} + snprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snprintf' is unsafe}} expected-note{{buffer pointer and size may not match}} + snwprintf(s.data(), s2.size(), "%s%d", "hello", *p); // expected-warning{{function 'snwprintf' is unsafe}} expected-note{{buffer pointer and size may not match}} + snwprintf_s( // expected-warning{{function 'snwprintf_s' is unsafe}} + s.data(), // expected-note{{buffer pointer and size may not match}} // note attached to the buffer + s2.size(), + "%s%d", "hello", *p); + vsnprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // expected-warning{{function 'vsnprintf' is unsafe}} expected-note{{'va_list' is unsafe}} + sscanf(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf' is unsafe}} + sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function 'sscanf_s' is unsafe}} + fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function 'fprintf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}} + fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn + printf("%s%d", "hello", *p); // no warn + snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn + snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn + snwprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn + snwprintf_s(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn + strlen("hello");// no warn +} + +void v(std::string s1, int *p) { + snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn + snprintf(s1.data(), s1.size_bytes(), s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn + printf("%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn + printf(s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn + fprintf((FILE*)0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn + fprintf((FILE*)0, s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn +} + + +void g(char *begin, char *end, char *p, std::span<char> s) { + std::copy(begin, end, p); // no warn + std::copy(s.begin(), s.end(), s.begin()); // no warn +} + +// warning gets turned off +void ff(char * p, char * q, std::span<char> s, std::span<char> s2) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunsafe-buffer-usage-in-libc-call" + memcpy(); + std::memcpy(); + __builtin_memcpy(p, q, 64); + __builtin___memcpy_chk(p, q, 8, 64); + __asan_memcpy(); + strcpy(); + std::strcpy(); + strcpy_s(); + wcscpy_s(); +#pragma clang diagnostic pop +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp index 844311c3a51a58..989931e41c0cc1 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp @@ -1,8 +1,6 @@ // RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fsafe-buffer-usage-suggestions -verify %s -// expected-no-diagnostics - typedef unsigned __darwin_size_t; typedef __darwin_size_t size_t; #define bzero(s, n) __builtin_bzero(s, n) -void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } +void __nosan_bzero(void *dst, size_t sz) { bzero(dst, sz); } // expected-warning{{function '__builtin_bzero' is unsafe}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits