https://github.com/ColinKinloch updated https://github.com/llvm/llvm-project/pull/177286
>From 6d340f8c2bb04f6646e920d262214c04ef538cc0 Mon Sep 17 00:00:00 2001 From: Colin Kinloch <[email protected]> Date: Wed, 21 Jan 2026 03:45:12 +0000 Subject: [PATCH 1/2] [clang][Sema] Add libc correctness warnings Define as builtin and add correctness checks for: * `open` / `open64` / `openat` / `openat64` * `umask` * `realpath` --- clang/include/clang/Basic/BuiltinHeaders.def | 2 + clang/include/clang/Basic/Builtins.td | 50 +++ clang/include/clang/Basic/DiagnosticGroups.td | 3 + .../clang/Basic/DiagnosticSemaKinds.td | 25 ++ clang/lib/Sema/SemaChecking.cpp | 294 ++++++++++++++++++ clang/test/Analysis/unix-api.c | 2 +- clang/test/Analysis/unix-api.cpp | 2 +- clang/test/Analysis/unix-fns-o_creat.c | 2 +- clang/test/Analysis/unix-fns.c | 4 +- clang/test/Sema/enable_if.c | 4 +- clang/test/Sema/enum-attr.c | 2 +- clang/test/Sema/type-dependent-attrs.c | 2 +- clang/test/Sema/warn-libc.c | 83 +++++ clang/utils/TableGen/ClangBuiltinsEmitter.cpp | 1 + 14 files changed, 467 insertions(+), 9 deletions(-) create mode 100644 clang/test/Sema/warn-libc.c diff --git a/clang/include/clang/Basic/BuiltinHeaders.def b/clang/include/clang/Basic/BuiltinHeaders.def index d6012a896eca9..985d98404555c 100644 --- a/clang/include/clang/Basic/BuiltinHeaders.def +++ b/clang/include/clang/Basic/BuiltinHeaders.def @@ -17,6 +17,7 @@ HEADER(BLOCKS_H, "Blocks.h") HEADER(COMPLEX_H, "complex.h") HEADER(CTYPE_H, "ctype.h") HEADER(EMMINTRIN_H, "emmintrin.h") +HEADER(FCNTL_H, "fcntl.h") HEADER(FOUNDATION_NSOBJCRUNTIME_H, "Foundation/NSObjCRuntime.h") HEADER(IMMINTRIN_H, "immintrin.h") HEADER(INTRIN_H, "intrin.h") @@ -37,6 +38,7 @@ HEADER(STDIO_H, "stdio.h") HEADER(STDLIB_H, "stdlib.h") HEADER(STRINGS_H, "strings.h") HEADER(STRING_H, "string.h") +HEADER(SYS_STAT_H, "sys/stat.h") HEADER(UNISTD_H, "unistd.h") HEADER(UTILITY, "utility") HEADER(WCHAR_H, "wchar.h") diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 9e00f3aa3020d..9d9863d36b703 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -3539,6 +3539,56 @@ def StrnCaseCmp : GNULibBuiltin<"strings.h"> { let RequiresUndef = 1; } +// POSIX fcntl.h + +def Open : LibBuiltin<"fcntl.h"> { + let Spellings = ["open"]; + let Attributes = [NoThrow]; + let Prototype = "int(char const*, int, ...)"; + let AddBuiltinPrefixedAlias = 1; +} + +def Open64 : LibBuiltin<"fcntl.h"> { + let Spellings = ["open64"]; + let Attributes = [NoThrow]; + let Prototype = "int(char const*, int, ...)"; + let AddBuiltinPrefixedAlias = 1; +} + +def OpenAt : LibBuiltin<"fcntl.h"> { + let Spellings = ["openat"]; + let Attributes = [NoThrow]; + let Prototype = "int(int, char const*, int, ...)"; + let AddBuiltinPrefixedAlias = 1; +} + +def OpenAt64 : LibBuiltin<"fcntl.h"> { + let Spellings = ["openat64"]; + let Attributes = [NoThrow]; + let Prototype = "int(int, char const*, int, ...)"; + let AddBuiltinPrefixedAlias = 1; +} + +// POSIX stat.h + +def UMask : LibBuiltin<"sys/stat.h"> { + let Spellings = ["umask"]; + let Attributes = [NoThrow]; + let Prototype = "mode_t(mode_t)"; + let AddBuiltinPrefixedAlias = 1; +} + +// POSIX stdlib.h + +def RealPath : LibBuiltin<"stdlib.h"> { + let Spellings = ["realpath"]; + let Attributes = [NoThrow, NonNull<NonOptimizing, [0]>]; + let Prototype = "char*(char const* restrict, char* restrict)"; + let AddBuiltinPrefixedAlias = 1; +} + +// POSIX unistd.h + def GNU_Exit : GNULibBuiltin<"unistd.h"> { let Spellings = ["_exit"]; let Attributes = [NoReturn]; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index de1d1e13ea712..84ed2d7d49dd5 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1802,3 +1802,6 @@ def TrivialAutoVarInit : DiagGroup<"trivial-auto-var-init">; // A warning for options that enable a feature that is not yet complete def ExperimentalOption : DiagGroup<"experimental-option">; + +// Warnings about incorrect libc usage +def IncorrectLibcUse : DiagGroup<"incorrect-libc-use">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a2be7ab3791b9..1e741037dbf64 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1031,6 +1031,31 @@ def err_ptrauth_indirect_goto_addrlabel_arithmetic : Error< "%select{subtraction|addition}0 of address-of-label expressions is not " "supported with ptrauth indirect gotos">; +// libc checks +def warn_surplus_args : Warning<"too many arguments passed to '%0'; it expects " + "a maximum of %1 variadic parameter">, + InGroup<IncorrectLibcUse>; +def warn_open_create_file_without_mode + : Warning<"nonzero 'mode' argument must be specified as the flag%s0 " + "'%1'%select{|| and '%2'}0 would result in file creation">, + InGroup<IncorrectLibcUse>; +def warn_open_superfluous_mode + : Warning<"nonzero 'mode' argument was specified but is unnecessary for " + "specified 'flags'">, + InGroup<IncorrectLibcUse>; + +def warn_libc_invalid_mode_t : Warning<"invalid mode">, + InGroup<IncorrectLibcUse>; + +def warn_path_max_overflow : Warning<"'%0' distination buffer needs to be " + "larger than than PATH_MAX bytes (%2)," + " but buffer is %1">, + InGroup<IncorrectLibcUse>; + +def warn_pollfd_nfds : Warning<"the element count value '%0' is higher than " + "the number of elements in the array '%1'">, + InGroup<IncorrectLibcUse>; + // __ptrauth qualifier def err_ptrauth_qualifier_invalid : Error< "%select{return type|parameter type|property}1 may not be qualified with " diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 8e40364faf66c..683ba4efe6424 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -23,6 +23,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclarationName.h" +#include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" @@ -56,6 +57,8 @@ #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TypeTraits.h" #include "clang/Lex/Lexer.h" // TODO: Extract static functions to fix layering. +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Ownership.h" @@ -1140,6 +1143,152 @@ static bool ProcessFormatStringLiteral(const Expr *FormatExpr, return false; } +static std::optional<int> getPathMaxValue(const ASTContext &Ctx) { + if (Ctx.getTargetInfo().getTriple().isOSGlibc()) + return {4096}; + + if (Ctx.getTargetInfo().getTriple().isOSDarwin()) + return {1024}; + + return std::nullopt; +} + +/* Follow simple references to other macros so we can match the Expr spelling */ +static const MacroInfo *resolveMacroChainAtLoc(Preprocessor &PP, + const IdentifierInfo *MII, + SourceLocation Loc) { + auto *MI = PP.getMacroDefinitionAtLoc(MII, Loc).getMacroInfo(); + if (!MI) + return nullptr; + const IdentifierInfo *MIIN = MII; + while (MI->getNumTokens() == 1 && MI->tokens_begin()->is(tok::identifier) && + (MIIN = MI->tokens_begin()->getIdentifierInfo()) && + MIIN->hasMacroDefinition()) { + MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(MIIN, Loc); + MI = MacroDef.getMacroInfo(); + } + return MI; +} + +// Search subexpressions for macros and attempt to evaluate them +class MacroFlagMatcher : public ConstDynamicRecursiveASTVisitor { + const Sema &S; + llvm::SmallVectorImpl<std::pair<const IdentifierInfo *, const MacroInfo *>> + &Macros; + std::map<const IdentifierInfo *, int64_t> &FoundMacros; + std::optional<SourceRange> LastMatchRange; + +public: + MacroFlagMatcher(const Sema &S, + llvm::SmallVectorImpl<std::pair<const IdentifierInfo *, + const MacroInfo *>> &Macros, + std::map<const IdentifierInfo *, int64_t> &FoundMacros) + : S(S), Macros(Macros), FoundMacros(FoundMacros) {} + + bool VisitExpr(const Expr *E) override { + if (isa<ParenExpr>(E) || isa<IntegerLiteral>(E)) { + // Check location + SourceLocation SpLoc = S.SourceMgr.getSpellingLoc(E->getExprLoc()); + for (auto *M = Macros.begin(); M != Macros.end();) { + if (M->second->tokens_begin()->getLocation() == SpLoc) { + LastMatchRange = E->getSourceRange(); + + Expr::EvalResult Result; + const Expr *SizeArg = E; + if (SizeArg->EvaluateAsInt(Result, S.getASTContext())) { + FoundMacros.insert( + std::pair(M->first, Result.Val.getInt().getExtValue())); + M = Macros.erase(M); + continue; + } + } + ++M; + } + } + + return !Macros.empty(); + } + + bool dataTraverseStmtPre(const Stmt *Statement) override { + /* Ignore the contents of a matched macro */ + return !(LastMatchRange && + (*LastMatchRange).fullyContains(Statement->getSourceRange())); + } + + static void getExpr(const Expr *E, ArrayRef<const IdentifierInfo *> Macros, + const Sema &S, + std::map<const IdentifierInfo *, int64_t> &FoundMacros) { + SmallVector<std::pair<const IdentifierInfo *, const MacroInfo *>, 2> + MacrosLoc; + for (auto &MII : Macros) { + auto *MI = resolveMacroChainAtLoc(S.PP, MII, E->getExprLoc()); + if (MI) + MacrosLoc.push_back(std::make_pair(MII, MI)); + } + MacroFlagMatcher Visitor(S, MacrosLoc, FoundMacros); + Visitor.TraverseStmt(E->getExprStmt()); + } +}; + +static std::optional<int> +evaluateSimpleMacroAtLocation(Preprocessor &PP, const IdentifierInfo *MacroII, + SourceLocation Loc) { + auto *MI = resolveMacroChainAtLoc(PP, MacroII, Loc); + if (!MI) + return std::nullopt; + + // Fast path for single digit integer + if (MI->getNumTokens() == 1) { + const Token &T = MI->tokens().back(); + if (T.getLength() == 1 || T.getKind() == tok::binary_data) { + const uint8_t Val = PP.getSpellingOfSingleCharacterNumericConstant(T); + return llvm::APInt(8, Val, /*isSigned=*/true).getSExtValue(); + } + } + + // Filter out parens. + std::vector<Token> FilteredTokens; + FilteredTokens.reserve(MI->tokens().size()); + for (auto &T : MI->tokens()) + if (!T.isOneOf(tok::l_paren, tok::r_paren)) + FilteredTokens.push_back(T); + + // Parse an integer at the end of the macro definition. + const Token &T = FilteredTokens.back(); + + if (!T.isLiteral()) + return std::nullopt; + + bool InvalidSpelling = false; + SmallVector<char> Buffer(T.getLength()); + // `Preprocessor::getSpelling` can get the spelling of the token regardless of + // whether the macro is defined in a PCH or not: + StringRef ValueStr = PP.getSpelling(T, Buffer, &InvalidSpelling); + + if (InvalidSpelling) + return std::nullopt; + + llvm::APSInt IntValue(/*BitWidth=*/0, /*isUnsigned=*/true); + constexpr unsigned AutoSenseRadix = 0; + if (ValueStr.getAsInteger(AutoSenseRadix, + static_cast<llvm::APInt &>(IntValue))) + return std::nullopt; + + // Parse an optional minus sign. + size_t Size = FilteredTokens.size(); + if (Size >= 2) { + if (FilteredTokens[Size - 2].is(tok::minus)) { + // Make sure there's space for a sign bit + if (IntValue.isSignBitSet()) + IntValue = IntValue.extend(IntValue.getBitWidth() + 1); + IntValue.setIsUnsigned(false); + IntValue = -IntValue; + } + } + + return IntValue.getExtValue(); +} + void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, CallExpr *TheCall) { if (TheCall->isValueDependent() || TheCall->isTypeDependent() || @@ -1240,6 +1389,21 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth); }; + auto DiagnoseBigMode = [&](unsigned ModeArgIndex) { + const Expr *ModeArg = TheCall->getArg(ModeArgIndex); + Expr::EvalResult Result; + uint32_t Mode = 0; + + if (ModeArg->EvaluateAsInt(Result, Context)) { + Mode = Result.Val.getInt().getExtValue(); + if ((Mode & ~0777) > 0) { + DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, + PDiag(diag::warn_libc_invalid_mode_t) + << ModeArg->getSourceRange()); + } + } + }; + std::optional<llvm::APSInt> SourceSize; std::optional<llvm::APSInt> DestinationSize; unsigned DiagID = 0; @@ -1450,6 +1614,136 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, } } DestinationSize = ComputeSizeArgument(0); + break; + } + + /* incorrect-libc-use start */ + case Builtin::BIumask: + case Builtin::BI__builtin_umask: { + DiagnoseBigMode(0); + break; + } + + case Builtin::BIopen: + case Builtin::BI__builtin_open: + case Builtin::BIopen64: + case Builtin::BI__builtin_open64: + case Builtin::BIopenat: + case Builtin::BI__builtin_openat: + case Builtin::BIopenat64: + case Builtin::BI__builtin_openat64: { + /* The param count is the index of the first variadic argument (mode) */ + unsigned ModeIndex = UseDecl->getNumParams(); + assert(TheCall->getNumArgs() >= ModeIndex); + unsigned NumVarArgs = TheCall->getNumArgs() - ModeIndex; + + if (NumVarArgs > 1) { + DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, + PDiag(diag::warn_surplus_args) + << GetFunctionName() << 1); + } + + std::optional<int> Flags; + const Expr *FlagsArg = TheCall->getArg(ModeIndex - 1); + Expr::EvalResult Result; + + const Expr *ModeArg = nullptr; + + std::optional<int> Mode; + + if (NumVarArgs >= 1) { + /* GNU libc accepts modes outside the 0777 range */ + if (!Context.getTargetInfo().getTriple().isOSLinux()) + DiagnoseBigMode(ModeIndex); + + ModeArg = TheCall->getArg(ModeIndex); + Expr::EvalResult Result; + if (ModeArg->EvaluateAsInt(Result, Context)) { + Mode = Result.Val.getInt().getExtValue(); + } + } + + if (FlagsArg->EvaluateAsInt(Result, Context)) + Flags = Result.Val.getInt().getExtValue(); + + if (!Flags) + break; + + int64_t OCreatValue = 0; + int64_t OTmpFileValue = 0; + + bool IsOCreat = false; + bool IsOTmpFile = false; + bool ExpectsMode = false; + + const IdentifierInfo *OCreatII = PP.getIdentifierInfo("O_CREAT"); + const IdentifierInfo *OTmpFileII = PP.getIdentifierInfo("O_TMPFILE"); + + OCreatValue = + evaluateSimpleMacroAtLocation(PP, OCreatII, FlagsArg->getExprLoc()) + .value_or(0); + OTmpFileValue = + evaluateSimpleMacroAtLocation(PP, OTmpFileII, FlagsArg->getExprLoc()) + .value_or(0); + + // Fallback to searching the argument for an expression to evaluate + if (!OCreatValue || !OTmpFileValue) { + std::map<const IdentifierInfo *, int64_t> FoundMacros; + MacroFlagMatcher::getExpr(FlagsArg, {OCreatII, OTmpFileII}, *this, + FoundMacros); + + auto OCreatIt = FoundMacros.find(OCreatII); + auto OTmpFileIt = FoundMacros.find(OTmpFileII); + + if (OCreatIt != FoundMacros.end()) + OCreatValue = OCreatIt->second; + if (OTmpFileIt != FoundMacros.end()) + OTmpFileValue = OTmpFileIt->second; + } + + IsOCreat = OCreatValue && (*Flags & OCreatValue) == OCreatValue; + IsOTmpFile = OTmpFileValue && (*Flags & OTmpFileValue) == OTmpFileValue; + ExpectsMode = IsOCreat || IsOTmpFile; + + /* check if mode should be present for flags */ + + // If we failed to evaluate the flags don't diagnose + if (!OCreatValue && !OTmpFileValue) + break; + + if (ExpectsMode && !Mode) { + int Count = 0; + if (IsOCreat) + Count++; + if (IsOTmpFile) + Count++; + auto D = PDiag(diag::warn_open_create_file_without_mode) + << FlagsArg->getSourceRange() << Count; + if (IsOCreat) + D << OCreatII->getName(); + if (IsOTmpFile) + D << OTmpFileII->getName(); + DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, D); + } else if (!ExpectsMode && (Mode && *Mode != 0)) { + DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, + PDiag(diag::warn_open_superfluous_mode) + << FlagsArg->getSourceRange() + << ModeArg->getSourceRange()); + } + break; + } + + case Builtin::BIrealpath: + case Builtin::BI__builtin_realpath: { + DiagID = diag::warn_fortify_source_overflow; + std::optional<int> PathMax = getPathMaxValue(Context); + DiagID = diag::warn_path_max_overflow; + if (PathMax) + SourceSize = + llvm::APSInt::getUnsigned(*PathMax).extOrTrunc(SizeTypeWidth); + DestinationSize = ComputeSizeArgument(TheCall->getNumArgs() - 1); + + break; } } diff --git a/clang/test/Analysis/unix-api.c b/clang/test/Analysis/unix-api.c index 64ff3c0fccf42..4361e1c6481c6 100644 --- a/clang/test/Analysis/unix-api.c +++ b/clang/test/Analysis/unix-api.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -Wno-incorrect-libc-use -verify %s #ifndef O_RDONLY #define O_RDONLY 0 diff --git a/clang/test/Analysis/unix-api.cpp b/clang/test/Analysis/unix-api.cpp index 2b07d8807c1f2..36f495cdd0654 100644 --- a/clang/test/Analysis/unix-api.cpp +++ b/clang/test/Analysis/unix-api.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -Wno-incorrect-libc-use -verify %s extern "C" { #ifndef O_RDONLY #define O_RDONLY 0 diff --git a/clang/test/Analysis/unix-fns-o_creat.c b/clang/test/Analysis/unix-fns-o_creat.c index 76df3851cfc9c..0d17bb43c74f5 100644 --- a/clang/test/Analysis/unix-fns-o_creat.c +++ b/clang/test/Analysis/unix-fns-o_creat.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -verify -analyzer-checker=core,unix.API -analyzer-output=text %s +// RUN: %clang_analyze_cc1 -verify -analyzer-checker=core,unix.API -Wno-incorrect-libc-use -analyzer-output=text %s // Verify that the UnixAPIChecker finds the missing mode value regardless // of the particular values of these macros, particularly O_CREAT. diff --git a/clang/test/Analysis/unix-fns.c b/clang/test/Analysis/unix-fns.c index 77894285bcb69..2356d1480aaac 100644 --- a/clang/test/Analysis/unix-fns.c +++ b/clang/test/Analysis/unix-fns.c @@ -1,6 +1,6 @@ -// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -analyzer-checker=core,unix.API,osx.API,optin.portability %s -analyzer-output=plist -analyzer-config faux-bodies=true -fblocks -verify -o %t.plist +// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -analyzer-checker=core,unix.API,osx.API,optin.portability %s -analyzer-output=plist -analyzer-config faux-bodies=true -fblocks -Wno-incorrect-libc-use -verify -o %t.plist // RUN: %normalize_plist <%t.plist | diff -ub %S/Inputs/expected-plists/unix-fns.c.plist - -// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=core,unix.API,osx.API,optin.portability %s -analyzer-output=plist -analyzer-config faux-bodies=true -fblocks -verify -o %t.plist +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=core,unix.API,osx.API,optin.portability %s -analyzer-output=plist -analyzer-config faux-bodies=true -fblocks -Wno-incorrect-libc-use -verify -o %t.plist // RUN: %normalize_plist <%t.plist | diff -ub %S/Inputs/expected-plists/unix-fns.c.plist - // RUN: mkdir -p %t.dir // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API,osx.API,optin.portability -analyzer-output=html -analyzer-config faux-bodies=true -fblocks -o %t.dir %s diff --git a/clang/test/Sema/enable_if.c b/clang/test/Sema/enable_if.c index 3ef8310a2fef7..a8e8d96b74503 100644 --- a/clang/test/Sema/enable_if.c +++ b/clang/test/Sema/enable_if.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 %s -verify -// RUN: %clang_cc1 %s -DCODEGEN -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fno-builtin-open %s -verify +// RUN: %clang_cc1 -fno-builtin-open %s -DCODEGEN -emit-llvm -o - | FileCheck %s #define O_CREAT 0x100 typedef int mode_t; diff --git a/clang/test/Sema/enum-attr.c b/clang/test/Sema/enum-attr.c index 1cdd7028e03e0..5a439f51d5912 100644 --- a/clang/test/Sema/enum-attr.c +++ b/clang/test/Sema/enum-attr.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -Wassign-enum -Wswitch-enum -Wcovered-switch-default %s +// RUN: %clang_cc1 -fno-builtin-open -fsyntax-only -verify -Wassign-enum -Wswitch-enum -Wcovered-switch-default %s enum Enum { A0 = 1, A1 = 10 diff --git a/clang/test/Sema/type-dependent-attrs.c b/clang/test/Sema/type-dependent-attrs.c index 13068b3f94ad4..915190d07adf0 100644 --- a/clang/test/Sema/type-dependent-attrs.c +++ b/clang/test/Sema/type-dependent-attrs.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fno-builtin-open -std=c23 -fsyntax-only -verify %s int open() { return 0; } void close(typeof(open()) *) {} diff --git a/clang/test/Sema/warn-libc.c b/clang/test/Sema/warn-libc.c new file mode 100644 index 0000000000000..607cf4a75e891 --- /dev/null +++ b/clang/test/Sema/warn-libc.c @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify +// RUN: %clang_cc1 -triple x86_64-linux %s -verify +// RUN: %clang_cc1 -triple x86_64-linux %s -verify -DO_CREAT='(32 | __linux__)' + + +#define FAUX_CREATE 0100 +#if O_CREAT != FAUX_CREATE +void call_open_no_creat(void) { + __builtin_open("name", FAUX_CREATE, 0777); + __builtin_open("name", FAUX_CREATE); +} +#endif + +#define O_RDONLY 0 +#define O_WRONLY 01 +#define O_RDWR 02 +#ifndef O_CREAT +#define O_CREAT 0100 +#endif +#define __O_DIRECTORY 0x10000 +#define __O_TMPFILE (020000000 | __O_DIRECTORY) +#define O_TMPFILE __O_TMPFILE /* Atomically create nameless file. */ + +void call_open(void) { +#if O_CREAT == 64 + __builtin_open("name", 64); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_CREAT' would result in file creation}} +#endif + __builtin_open("name", O_TMPFILE | O_RDONLY); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_TMPFILE' would result in file creation}} + __builtin_open("name", O_TMPFILE + O_RDONLY); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_TMPFILE' would result in file creation}} + __builtin_open("name", O_TMPFILE); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_TMPFILE' would result in file creation}} + __builtin_open("name", O_CREAT); // expected-warning {{nonzero 'mode' argument must be specified as the flag 'O_CREAT' would result in file creation}} + __builtin_open("name", O_CREAT | O_TMPFILE); // expected-warning {{nonzero 'mode' argument must be specified as the flags 'O_CREAT' and 'O_TMPFILE' would result in file creation}} + __builtin_open("name", O_CREAT | O_TMPFILE, 0777); + __builtin_open("name", O_CREAT | O_TMPFILE, 0777, 0); // expected-warning {{too many arguments passed to 'open'; it expects a maximum of 1 variadic parameter}} + __builtin_open("name", O_CREAT | O_TMPFILE, 0777, 0, 0); // expected-warning {{too many arguments passed to 'open'; it expects a maximum of 1 variadic parameter}} +} + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned int uint32_t; +typedef uint32_t mode_t; + +mode_t umode(mode_t); +int open(const char *pathname, int flags, ... /* mode_t mode */ ); +int open64(const char *pathname, int flags, ... /* mode_t mode */ ); +int openat(int fddir, const char *pathname, int flags, ... /* mode_t mode */ ); +int openat64(int fddir, const char *pathname, int flags, ... /* mode_t mode */ ); + +#ifdef __cplusplus +} +#endif + +void call_openat(void) { + __builtin_openat(0, "name", O_CREAT, 0777); + __builtin_openat(0, "name", O_CREAT, 01000); +#if !defined(__linux__) + // expected-warning@-2{{invalid mode}} +#endif +} + +void call_umask(void) { + __builtin_umask(0); + __builtin_umask(0777); + __builtin_umask(01000); // expected-warning {{invalid mode}} +} + +#if defined(__APPLE__) +#define PATH_MAX 1024 +#elif defined(__linux__) +#define PATH_MAX 4096 +#endif + +void call_realpath() { + char too_small[PATH_MAX - 1]; + char too_big[PATH_MAX + 1]; + char too_just_right[PATH_MAX]; + + __builtin_realpath("hah", too_small); // expected-warning-re {{'realpath' distination buffer needs to be larger than than PATH_MAX bytes ({{[0-9]+}}), but buffer is {{[0-9]+}}}} + __builtin_realpath("hah", too_big); + __builtin_realpath("hah", too_just_right); +} diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp index fb089a811ef92..fb5829ced5f55 100644 --- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp +++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp @@ -356,6 +356,7 @@ class PrototypeParser { .Case("int32_t", "Zi") .Case("int64_t", "Wi") .Case("jmp_buf", "J") + .Case("mode_t", "UZi") .Case("msint32_t", "Ni") .Case("msuint32_t", "UNi") .Case("objc_super", "M") >From 6d641fe9842542cd2d94210bdd278ba77559912e Mon Sep 17 00:00:00 2001 From: Colin Kinloch <[email protected]> Date: Wed, 21 Jan 2026 03:49:17 +0000 Subject: [PATCH 2/2] [clang][Sema] Add bounds checking for libc poll Manually identify a call to the libc `poll` function and verify that the fd count argument isn't greater than the number of elements in the fds array. --- clang/lib/Sema/SemaChecking.cpp | 126 ++++++++++++++++++++++++++++++++ clang/test/Sema/warn-libc.c | 42 +++++++++++ 2 files changed, 168 insertions(+) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 683ba4efe6424..5b7934d0e0def 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -4563,6 +4563,125 @@ void Sema::CheckConstructorCall(FunctionDecl *FDecl, QualType ThisType, Loc, SourceRange(), CallType); } +static std::optional<llvm::APSInt> GetArrayElementCount(Sema &S, + const Expr *BaseExpr) { + const Type *EffectiveType = + BaseExpr->getType()->getPointeeOrArrayElementType(); + if (EffectiveType->isDependentType()) + return {}; + + BaseExpr = BaseExpr->IgnoreParenCasts(); + const ConstantArrayType *ArrayTy = + S.Context.getAsConstantArrayType(BaseExpr->getType()); + + if (!ArrayTy) + return {}; + + const Type *BaseType = ArrayTy->getElementType().getTypePtr(); + + if (BaseType->isDependentType() || BaseType->isIncompleteType()) + return {}; + + LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = + S.getLangOpts().getStrictFlexArraysLevel(); + + if (BaseExpr->isFlexibleArrayMemberLike( + S.Context, StrictFlexArraysLevel, + /*IgnoreTemplateOrMacroSubstitution=*/true)) + return {}; + + llvm::APInt ArrayTySize = ArrayTy->getSize(); + if (BaseType != EffectiveType) { + // Make sure we're comparing apples to apples when comparing index to + // size. + uint64_t ptrarith_typesize = S.Context.getTypeSize(EffectiveType); + uint64_t array_typesize = S.Context.getTypeSize(BaseType); + + // Handle ptrarith_typesize being zero, such as when casting to void*. + // Use the size in bits (what "getTypeSize()" returns) rather than bytes. + if (!ptrarith_typesize) + ptrarith_typesize = S.Context.getCharWidth(); + + if (ptrarith_typesize != array_typesize) { + // There's a cast to a different size type involved. + uint64_t ratio = array_typesize / ptrarith_typesize; + + // TODO: Be smarter about handling cases where array_typesize is not a + // multiple of ptrarith_typesize. + if (ptrarith_typesize * ratio == array_typesize) + ArrayTySize *= llvm::APInt(ArrayTySize.getBitWidth(), ratio); + } + } + + return llvm::APSInt(std::move(ArrayTySize)); +} + +static bool CheckLibcPoll(Sema &S, FunctionDecl *FDecl, CallExpr *TheCall) { + // Check that the function resembles libc poll + if (!S.getSourceManager().isInSystemHeader(FDecl->getLocation())) + return false; + + if (TheCall->getNumArgs() != 3) + return false; + + if (!FDecl->getReturnType()->isSignedIntegerType()) + return false; + + const IdentifierTable::iterator It = S.Context.Idents.find("pollfd"); + + // If we can't find pollfd cancel the check + if (It == S.Context.Idents.end()) + return false; + + const IdentifierInfo *II = It->second; + + Expr *FdsArg = TheCall->getArg(0); + QualType FdsType = FdsArg->getType(); + + if (!FdsType->isPointerOrReferenceType() && !FdsType->isArrayType()) + return false; + + const Type *elType = FdsType->getPointeeOrArrayElementType(); + + if (!elType->isRecordType()) + return false; + + const RecordDecl *RD = elType->getAsRecordDecl(); + if (II != RD->getIdentifier()) + return false; + + // Check size type + Expr *NfdsArg = TheCall->getArg(1); + auto &ExpectedNfdsType = S.Context.UnsignedLongTy; + if (S.Context.getTargetInfo().getTriple().isOSDarwin()) + ExpectedNfdsType = S.Context.UnsignedIntTy; + + if (!S.Context.hasSameType(NfdsArg->getType().getUnqualifiedType(), + ExpectedNfdsType)) + return false; + + Expr::EvalResult Result; + if (!NfdsArg->EvaluateAsInt(Result, S.getASTContext())) + return false; + llvm::APSInt NfdsValue = Result.Val.getInt(); + NfdsValue.setIsUnsigned(true); + + std::optional<llvm::APSInt> FdsElCount = GetArrayElementCount(S, FdsArg); + + if (FdsElCount) { + if (llvm::APSInt::compareValues(NfdsValue, *FdsElCount) > 0) { + SmallString<16> FdsElCountStr; + SmallString<16> NfdsValueStr; + FdsElCount->toString(FdsElCountStr, /*Radix=*/10); + NfdsValue.toString(NfdsValueStr, /*Radix=*/10); + S.Diag(TheCall->getBeginLoc(), diag::warn_pollfd_nfds) + << NfdsValueStr << FdsElCountStr; + } + } + + return true; +} + bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto) { bool IsMemberOperatorCall = isa<CXXOperatorCallExpr>(TheCall) && @@ -4621,6 +4740,13 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, CheckMaxUnsignedZero(TheCall, FDecl); CheckInfNaNFunction(TheCall, FDecl); + if (FDecl->isExternC()) { + const IdentifierInfo *II = FDecl->getIdentifier(); + if (II->isStr("poll")) { + CheckLibcPoll(*this, FDecl, TheCall); + } + } + if (getLangOpts().ObjC) ObjC().DiagnoseCStringFormatDirectiveInCFAPI(FDecl, Args, NumArgs); diff --git a/clang/test/Sema/warn-libc.c b/clang/test/Sema/warn-libc.c index 607cf4a75e891..77ee9505d764e 100644 --- a/clang/test/Sema/warn-libc.c +++ b/clang/test/Sema/warn-libc.c @@ -81,3 +81,45 @@ void call_realpath() { __builtin_realpath("hah", too_big); __builtin_realpath("hah", too_just_right); } + +# 1 "poll.h" 1 3 +# 1 "sys/poll.h" 1 3 + +#if defined(__APPLE__) +typedef unsigned int nfds_t; +#elif defined(__linux__) +typedef unsigned long int nfds_t; +#endif + +struct pollfd { + int fd; + short events; + short revents; +}; +extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout); + +# 2 "poll_test.c" 2 + +#define __builtin_poll poll + +void call_poll(void) { + struct pollfd fds[] = { + {0, 0, 0}, + {0, 0, 0}, + {0, 0, 0}, + {0, 0, 0}, + {0, 0, 0}, + {0, 0, 0}, + {0, 0, 0}, + {0, 0, 0}, + {0, 0, 0}, + }; + const nfds_t nfds = sizeof(fds) / sizeof(*fds); + __builtin_poll(fds, nfds, 0); + __builtin_poll(fds, nfds + 1, 0); // expected-warning {{the element count value '10' is higher than the number of elements in the array '9'}} + __builtin_poll(fds, nfds - 1, 0); + /* Unhandled errors */ + __builtin_poll(&fds[1], nfds, 0); + __builtin_poll(fds + 1, nfds, 0); + __builtin_poll(fds - 1, nfds, 0); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
