https://github.com/ColinKinloch updated 
https://github.com/llvm/llvm-project/pull/177286

>From 6d340f8c2bb04f6646e920d262214c04ef538cc0 Mon Sep 17 00:00:00 2001
From: Colin Kinloch <[email protected]>
Date: Wed, 21 Jan 2026 03:45:12 +0000
Subject: [PATCH 1/2] [clang][Sema] Add libc correctness warnings

Define as builtin and add correctness checks for:
* `open` / `open64` / `openat` / `openat64`
* `umask`
* `realpath`
---
 clang/include/clang/Basic/BuiltinHeaders.def  |   2 +
 clang/include/clang/Basic/Builtins.td         |  50 +++
 clang/include/clang/Basic/DiagnosticGroups.td |   3 +
 .../clang/Basic/DiagnosticSemaKinds.td        |  25 ++
 clang/lib/Sema/SemaChecking.cpp               | 294 ++++++++++++++++++
 clang/test/Analysis/unix-api.c                |   2 +-
 clang/test/Analysis/unix-api.cpp              |   2 +-
 clang/test/Analysis/unix-fns-o_creat.c        |   2 +-
 clang/test/Analysis/unix-fns.c                |   4 +-
 clang/test/Sema/enable_if.c                   |   4 +-
 clang/test/Sema/enum-attr.c                   |   2 +-
 clang/test/Sema/type-dependent-attrs.c        |   2 +-
 clang/test/Sema/warn-libc.c                   |  83 +++++
 clang/utils/TableGen/ClangBuiltinsEmitter.cpp |   1 +
 14 files changed, 467 insertions(+), 9 deletions(-)
 create mode 100644 clang/test/Sema/warn-libc.c

diff --git a/clang/include/clang/Basic/BuiltinHeaders.def 
b/clang/include/clang/Basic/BuiltinHeaders.def
index d6012a896eca9..985d98404555c 100644
--- a/clang/include/clang/Basic/BuiltinHeaders.def
+++ b/clang/include/clang/Basic/BuiltinHeaders.def
@@ -17,6 +17,7 @@ HEADER(BLOCKS_H, "Blocks.h")
 HEADER(COMPLEX_H, "complex.h")
 HEADER(CTYPE_H, "ctype.h")
 HEADER(EMMINTRIN_H, "emmintrin.h")
+HEADER(FCNTL_H, "fcntl.h")
 HEADER(FOUNDATION_NSOBJCRUNTIME_H, "Foundation/NSObjCRuntime.h")
 HEADER(IMMINTRIN_H, "immintrin.h")
 HEADER(INTRIN_H, "intrin.h")
@@ -37,6 +38,7 @@ HEADER(STDIO_H, "stdio.h")
 HEADER(STDLIB_H, "stdlib.h")
 HEADER(STRINGS_H, "strings.h")
 HEADER(STRING_H, "string.h")
+HEADER(SYS_STAT_H, "sys/stat.h")
 HEADER(UNISTD_H, "unistd.h")
 HEADER(UTILITY, "utility")
 HEADER(WCHAR_H, "wchar.h")
diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 9e00f3aa3020d..9d9863d36b703 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -3539,6 +3539,56 @@ def StrnCaseCmp : GNULibBuiltin<"strings.h"> {
   let RequiresUndef = 1;
 }
 
+// POSIX fcntl.h
+
+def Open : LibBuiltin<"fcntl.h"> {
+  let Spellings = ["open"];
+  let Attributes = [NoThrow];
+  let Prototype = "int(char const*, int, ...)";
+  let AddBuiltinPrefixedAlias = 1;
+}
+
+def Open64 : LibBuiltin<"fcntl.h"> {
+  let Spellings = ["open64"];
+  let Attributes = [NoThrow];
+  let Prototype = "int(char const*, int, ...)";
+  let AddBuiltinPrefixedAlias = 1;
+}
+
+def OpenAt : LibBuiltin<"fcntl.h"> {
+  let Spellings = ["openat"];
+  let Attributes = [NoThrow];
+  let Prototype = "int(int, char const*, int, ...)";
+  let AddBuiltinPrefixedAlias = 1;
+}
+
+def OpenAt64 : LibBuiltin<"fcntl.h"> {
+  let Spellings = ["openat64"];
+  let Attributes = [NoThrow];
+  let Prototype = "int(int, char const*, int, ...)";
+  let AddBuiltinPrefixedAlias = 1;
+}
+
+// POSIX stat.h
+
+def UMask : LibBuiltin<"sys/stat.h"> {
+  let Spellings = ["umask"];
+  let Attributes = [NoThrow];
+  let Prototype = "mode_t(mode_t)";
+  let AddBuiltinPrefixedAlias = 1;
+}
+
+// POSIX stdlib.h
+
+def RealPath : LibBuiltin<"stdlib.h"> {
+  let Spellings = ["realpath"];
+  let Attributes = [NoThrow, NonNull<NonOptimizing, [0]>];
+  let Prototype = "char*(char const* restrict, char* restrict)";
+  let AddBuiltinPrefixedAlias = 1;
+}
+
+// POSIX unistd.h
+
 def GNU_Exit : GNULibBuiltin<"unistd.h"> {
   let Spellings = ["_exit"];
   let Attributes = [NoReturn];
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td 
b/clang/include/clang/Basic/DiagnosticGroups.td
index de1d1e13ea712..84ed2d7d49dd5 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1802,3 +1802,6 @@ def TrivialAutoVarInit : 
DiagGroup<"trivial-auto-var-init">;
 
 // A warning for options that enable a feature that is not yet complete
 def ExperimentalOption : DiagGroup<"experimental-option">;
+
+// Warnings about incorrect libc usage
+def IncorrectLibcUse : DiagGroup<"incorrect-libc-use">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a2be7ab3791b9..1e741037dbf64 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1031,6 +1031,31 @@ def err_ptrauth_indirect_goto_addrlabel_arithmetic : 
Error<
   "%select{subtraction|addition}0 of address-of-label expressions is not "
   "supported with ptrauth indirect gotos">;
 
+// libc checks
+def warn_surplus_args : Warning<"too many arguments passed to '%0'; it expects 
"
+                                "a maximum of %1 variadic parameter">,
+                        InGroup<IncorrectLibcUse>;
+def warn_open_create_file_without_mode
+    : Warning<"nonzero 'mode' argument must be specified as the flag%s0 "
+              "'%1'%select{|| and '%2'}0 would result in file creation">,
+      InGroup<IncorrectLibcUse>;
+def warn_open_superfluous_mode
+    : Warning<"nonzero 'mode' argument was specified but is unnecessary for "
+              "specified 'flags'">,
+      InGroup<IncorrectLibcUse>;
+
+def warn_libc_invalid_mode_t : Warning<"invalid mode">,
+                               InGroup<IncorrectLibcUse>;
+
+def warn_path_max_overflow : Warning<"'%0' distination buffer needs to be "
+                                     "larger than than PATH_MAX bytes (%2),"
+                                     " but buffer is %1">,
+                             InGroup<IncorrectLibcUse>;
+
+def warn_pollfd_nfds : Warning<"the element count value '%0' is higher than "
+                               "the number of elements in the array '%1'">,
+                       InGroup<IncorrectLibcUse>;
+
 // __ptrauth qualifier
 def err_ptrauth_qualifier_invalid : Error<
   "%select{return type|parameter type|property}1 may not be qualified with "
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 8e40364faf66c..683ba4efe6424 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -23,6 +23,7 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclarationName.h"
+#include "clang/AST/DynamicRecursiveASTVisitor.h"
 #include "clang/AST/EvaluatedExprVisitor.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
@@ -56,6 +57,8 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TypeTraits.h"
 #include "clang/Lex/Lexer.h" // TODO: Extract static functions to fix layering.
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Ownership.h"
@@ -1140,6 +1143,152 @@ static bool ProcessFormatStringLiteral(const Expr 
*FormatExpr,
   return false;
 }
 
+static std::optional<int> getPathMaxValue(const ASTContext &Ctx) {
+  if (Ctx.getTargetInfo().getTriple().isOSGlibc())
+    return {4096};
+
+  if (Ctx.getTargetInfo().getTriple().isOSDarwin())
+    return {1024};
+
+  return std::nullopt;
+}
+
+/* Follow simple references to other macros so we can match the Expr spelling 
*/
+static const MacroInfo *resolveMacroChainAtLoc(Preprocessor &PP,
+                                               const IdentifierInfo *MII,
+                                               SourceLocation Loc) {
+  auto *MI = PP.getMacroDefinitionAtLoc(MII, Loc).getMacroInfo();
+  if (!MI)
+    return nullptr;
+  const IdentifierInfo *MIIN = MII;
+  while (MI->getNumTokens() == 1 && MI->tokens_begin()->is(tok::identifier) &&
+         (MIIN = MI->tokens_begin()->getIdentifierInfo()) &&
+         MIIN->hasMacroDefinition()) {
+    MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(MIIN, Loc);
+    MI = MacroDef.getMacroInfo();
+  }
+  return MI;
+}
+
+// Search subexpressions for macros and attempt to evaluate them
+class MacroFlagMatcher : public ConstDynamicRecursiveASTVisitor {
+  const Sema &S;
+  llvm::SmallVectorImpl<std::pair<const IdentifierInfo *, const MacroInfo *>>
+      &Macros;
+  std::map<const IdentifierInfo *, int64_t> &FoundMacros;
+  std::optional<SourceRange> LastMatchRange;
+
+public:
+  MacroFlagMatcher(const Sema &S,
+                   llvm::SmallVectorImpl<std::pair<const IdentifierInfo *,
+                                                   const MacroInfo *>> &Macros,
+                   std::map<const IdentifierInfo *, int64_t> &FoundMacros)
+      : S(S), Macros(Macros), FoundMacros(FoundMacros) {}
+
+  bool VisitExpr(const Expr *E) override {
+    if (isa<ParenExpr>(E) || isa<IntegerLiteral>(E)) {
+      // Check location
+      SourceLocation SpLoc = S.SourceMgr.getSpellingLoc(E->getExprLoc());
+      for (auto *M = Macros.begin(); M != Macros.end();) {
+        if (M->second->tokens_begin()->getLocation() == SpLoc) {
+          LastMatchRange = E->getSourceRange();
+
+          Expr::EvalResult Result;
+          const Expr *SizeArg = E;
+          if (SizeArg->EvaluateAsInt(Result, S.getASTContext())) {
+            FoundMacros.insert(
+                std::pair(M->first, Result.Val.getInt().getExtValue()));
+            M = Macros.erase(M);
+            continue;
+          }
+        }
+        ++M;
+      }
+    }
+
+    return !Macros.empty();
+  }
+
+  bool dataTraverseStmtPre(const Stmt *Statement) override {
+    /* Ignore the contents of a matched macro */
+    return !(LastMatchRange &&
+             (*LastMatchRange).fullyContains(Statement->getSourceRange()));
+  }
+
+  static void getExpr(const Expr *E, ArrayRef<const IdentifierInfo *> Macros,
+                      const Sema &S,
+                      std::map<const IdentifierInfo *, int64_t> &FoundMacros) {
+    SmallVector<std::pair<const IdentifierInfo *, const MacroInfo *>, 2>
+        MacrosLoc;
+    for (auto &MII : Macros) {
+      auto *MI = resolveMacroChainAtLoc(S.PP, MII, E->getExprLoc());
+      if (MI)
+        MacrosLoc.push_back(std::make_pair(MII, MI));
+    }
+    MacroFlagMatcher Visitor(S, MacrosLoc, FoundMacros);
+    Visitor.TraverseStmt(E->getExprStmt());
+  }
+};
+
+static std::optional<int>
+evaluateSimpleMacroAtLocation(Preprocessor &PP, const IdentifierInfo *MacroII,
+                              SourceLocation Loc) {
+  auto *MI = resolveMacroChainAtLoc(PP, MacroII, Loc);
+  if (!MI)
+    return std::nullopt;
+
+  // Fast path for single digit integer
+  if (MI->getNumTokens() == 1) {
+    const Token &T = MI->tokens().back();
+    if (T.getLength() == 1 || T.getKind() == tok::binary_data) {
+      const uint8_t Val = PP.getSpellingOfSingleCharacterNumericConstant(T);
+      return llvm::APInt(8, Val, /*isSigned=*/true).getSExtValue();
+    }
+  }
+
+  // Filter out parens.
+  std::vector<Token> FilteredTokens;
+  FilteredTokens.reserve(MI->tokens().size());
+  for (auto &T : MI->tokens())
+    if (!T.isOneOf(tok::l_paren, tok::r_paren))
+      FilteredTokens.push_back(T);
+
+  // Parse an integer at the end of the macro definition.
+  const Token &T = FilteredTokens.back();
+
+  if (!T.isLiteral())
+    return std::nullopt;
+
+  bool InvalidSpelling = false;
+  SmallVector<char> Buffer(T.getLength());
+  // `Preprocessor::getSpelling` can get the spelling of the token regardless 
of
+  // whether the macro is defined in a PCH or not:
+  StringRef ValueStr = PP.getSpelling(T, Buffer, &InvalidSpelling);
+
+  if (InvalidSpelling)
+    return std::nullopt;
+
+  llvm::APSInt IntValue(/*BitWidth=*/0, /*isUnsigned=*/true);
+  constexpr unsigned AutoSenseRadix = 0;
+  if (ValueStr.getAsInteger(AutoSenseRadix,
+                            static_cast<llvm::APInt &>(IntValue)))
+    return std::nullopt;
+
+  // Parse an optional minus sign.
+  size_t Size = FilteredTokens.size();
+  if (Size >= 2) {
+    if (FilteredTokens[Size - 2].is(tok::minus)) {
+      // Make sure there's space for a sign bit
+      if (IntValue.isSignBitSet())
+        IntValue = IntValue.extend(IntValue.getBitWidth() + 1);
+      IntValue.setIsUnsigned(false);
+      IntValue = -IntValue;
+    }
+  }
+
+  return IntValue.getExtValue();
+}
+
 void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
                                                CallExpr *TheCall) {
   if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
@@ -1240,6 +1389,21 @@ void 
Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
     return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth);
   };
 
+  auto DiagnoseBigMode = [&](unsigned ModeArgIndex) {
+    const Expr *ModeArg = TheCall->getArg(ModeArgIndex);
+    Expr::EvalResult Result;
+    uint32_t Mode = 0;
+
+    if (ModeArg->EvaluateAsInt(Result, Context)) {
+      Mode = Result.Val.getInt().getExtValue();
+      if ((Mode & ~0777) > 0) {
+        DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
+                            PDiag(diag::warn_libc_invalid_mode_t)
+                                << ModeArg->getSourceRange());
+      }
+    }
+  };
+
   std::optional<llvm::APSInt> SourceSize;
   std::optional<llvm::APSInt> DestinationSize;
   unsigned DiagID = 0;
@@ -1450,6 +1614,136 @@ void 
Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
       }
     }
     DestinationSize = ComputeSizeArgument(0);
+    break;
+  }
+
+  /* incorrect-libc-use start */
+  case Builtin::BIumask:
+  case Builtin::BI__builtin_umask: {
+    DiagnoseBigMode(0);
+    break;
+  }
+
+  case Builtin::BIopen:
+  case Builtin::BI__builtin_open:
+  case Builtin::BIopen64:
+  case Builtin::BI__builtin_open64:
+  case Builtin::BIopenat:
+  case Builtin::BI__builtin_openat:
+  case Builtin::BIopenat64:
+  case Builtin::BI__builtin_openat64: {
+    /* The param count is the index of the first variadic argument (mode) */
+    unsigned ModeIndex = UseDecl->getNumParams();
+    assert(TheCall->getNumArgs() >= ModeIndex);
+    unsigned NumVarArgs = TheCall->getNumArgs() - ModeIndex;
+
+    if (NumVarArgs > 1) {
+      DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
+                          PDiag(diag::warn_surplus_args)
+                              << GetFunctionName() << 1);
+    }
+
+    std::optional<int> Flags;
+    const Expr *FlagsArg = TheCall->getArg(ModeIndex - 1);
+    Expr::EvalResult Result;
+
+    const Expr *ModeArg = nullptr;
+
+    std::optional<int> Mode;
+
+    if (NumVarArgs >= 1) {
+      /* GNU libc accepts modes outside the 0777 range */
+      if (!Context.getTargetInfo().getTriple().isOSLinux())
+        DiagnoseBigMode(ModeIndex);
+
+      ModeArg = TheCall->getArg(ModeIndex);
+      Expr::EvalResult Result;
+      if (ModeArg->EvaluateAsInt(Result, Context)) {
+        Mode = Result.Val.getInt().getExtValue();
+      }
+    }
+
+    if (FlagsArg->EvaluateAsInt(Result, Context))
+      Flags = Result.Val.getInt().getExtValue();
+
+    if (!Flags)
+      break;
+
+    int64_t OCreatValue = 0;
+    int64_t OTmpFileValue = 0;
+
+    bool IsOCreat = false;
+    bool IsOTmpFile = false;
+    bool ExpectsMode = false;
+
+    const IdentifierInfo *OCreatII = PP.getIdentifierInfo("O_CREAT");
+    const IdentifierInfo *OTmpFileII = PP.getIdentifierInfo("O_TMPFILE");
+
+    OCreatValue =
+        evaluateSimpleMacroAtLocation(PP, OCreatII, FlagsArg->getExprLoc())
+            .value_or(0);
+    OTmpFileValue =
+        evaluateSimpleMacroAtLocation(PP, OTmpFileII, FlagsArg->getExprLoc())
+            .value_or(0);
+
+    // Fallback to searching the argument for an expression to evaluate
+    if (!OCreatValue || !OTmpFileValue) {
+      std::map<const IdentifierInfo *, int64_t> FoundMacros;
+      MacroFlagMatcher::getExpr(FlagsArg, {OCreatII, OTmpFileII}, *this,
+                                FoundMacros);
+
+      auto OCreatIt = FoundMacros.find(OCreatII);
+      auto OTmpFileIt = FoundMacros.find(OTmpFileII);
+
+      if (OCreatIt != FoundMacros.end())
+        OCreatValue = OCreatIt->second;
+      if (OTmpFileIt != FoundMacros.end())
+        OTmpFileValue = OTmpFileIt->second;
+    }
+
+    IsOCreat = OCreatValue && (*Flags & OCreatValue) == OCreatValue;
+    IsOTmpFile = OTmpFileValue && (*Flags & OTmpFileValue) == OTmpFileValue;
+    ExpectsMode = IsOCreat || IsOTmpFile;
+
+    /* check if mode should be present for flags */
+
+    // If we failed to evaluate the flags don't diagnose
+    if (!OCreatValue && !OTmpFileValue)
+      break;
+
+    if (ExpectsMode && !Mode) {
+      int Count = 0;
+      if (IsOCreat)
+        Count++;
+      if (IsOTmpFile)
+        Count++;
+      auto D = PDiag(diag::warn_open_create_file_without_mode)
+               << FlagsArg->getSourceRange() << Count;
+      if (IsOCreat)
+        D << OCreatII->getName();
+      if (IsOTmpFile)
+        D << OTmpFileII->getName();
+      DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, D);
+    } else if (!ExpectsMode && (Mode && *Mode != 0)) {
+      DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
+                          PDiag(diag::warn_open_superfluous_mode)
+                              << FlagsArg->getSourceRange()
+                              << ModeArg->getSourceRange());
+    }
+    break;
+  }
+
+  case Builtin::BIrealpath:
+  case Builtin::BI__builtin_realpath: {
+    DiagID = diag::warn_fortify_source_overflow;
+    std::optional<int> PathMax = getPathMaxValue(Context);
+    DiagID = diag::warn_path_max_overflow;
+    if (PathMax)
+      SourceSize =
+          llvm::APSInt::getUnsigned(*PathMax).extOrTrunc(SizeTypeWidth);
+    DestinationSize = ComputeSizeArgument(TheCall->getNumArgs() - 1);
+
+    break;
   }
   }
 
diff --git a/clang/test/Analysis/unix-api.c b/clang/test/Analysis/unix-api.c
index 64ff3c0fccf42..4361e1c6481c6 100644
--- a/clang/test/Analysis/unix-api.c
+++ b/clang/test/Analysis/unix-api.c
@@ -1,4 +1,4 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API 
-Wno-incorrect-libc-use -verify %s
 
 #ifndef O_RDONLY
 #define O_RDONLY 0
diff --git a/clang/test/Analysis/unix-api.cpp b/clang/test/Analysis/unix-api.cpp
index 2b07d8807c1f2..36f495cdd0654 100644
--- a/clang/test/Analysis/unix-api.cpp
+++ b/clang/test/Analysis/unix-api.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.API 
-Wno-incorrect-libc-use -verify %s
 extern "C" {
 #ifndef O_RDONLY
 #define O_RDONLY 0
diff --git a/clang/test/Analysis/unix-fns-o_creat.c 
b/clang/test/Analysis/unix-fns-o_creat.c
index 76df3851cfc9c..0d17bb43c74f5 100644
--- a/clang/test/Analysis/unix-fns-o_creat.c
+++ b/clang/test/Analysis/unix-fns-o_creat.c
@@ -1,4 +1,4 @@
-// RUN: %clang_analyze_cc1 -verify -analyzer-checker=core,unix.API 
-analyzer-output=text %s
+// RUN: %clang_analyze_cc1 -verify -analyzer-checker=core,unix.API 
-Wno-incorrect-libc-use -analyzer-output=text %s
 
 // Verify that the UnixAPIChecker finds the missing mode value regardless
 // of the particular values of these macros, particularly O_CREAT.
diff --git a/clang/test/Analysis/unix-fns.c b/clang/test/Analysis/unix-fns.c
index 77894285bcb69..2356d1480aaac 100644
--- a/clang/test/Analysis/unix-fns.c
+++ b/clang/test/Analysis/unix-fns.c
@@ -1,6 +1,6 @@
-// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 
-analyzer-checker=core,unix.API,osx.API,optin.portability %s 
-analyzer-output=plist -analyzer-config faux-bodies=true  -fblocks -verify -o 
%t.plist
+// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 
-analyzer-checker=core,unix.API,osx.API,optin.portability %s 
-analyzer-output=plist -analyzer-config faux-bodies=true  -fblocks 
-Wno-incorrect-libc-use -verify -o %t.plist
 // RUN: %normalize_plist <%t.plist | diff -ub 
%S/Inputs/expected-plists/unix-fns.c.plist -
-// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux 
-analyzer-checker=core,unix.API,osx.API,optin.portability %s 
-analyzer-output=plist -analyzer-config faux-bodies=true  -fblocks -verify -o 
%t.plist
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux 
-analyzer-checker=core,unix.API,osx.API,optin.portability %s 
-analyzer-output=plist -analyzer-config faux-bodies=true  -fblocks 
-Wno-incorrect-libc-use -verify -o %t.plist
 // RUN: %normalize_plist <%t.plist | diff -ub 
%S/Inputs/expected-plists/unix-fns.c.plist -
 // RUN: mkdir -p %t.dir
 // RUN: %clang_analyze_cc1 
-analyzer-checker=core,unix.API,osx.API,optin.portability -analyzer-output=html 
-analyzer-config faux-bodies=true -fblocks -o %t.dir %s
diff --git a/clang/test/Sema/enable_if.c b/clang/test/Sema/enable_if.c
index 3ef8310a2fef7..a8e8d96b74503 100644
--- a/clang/test/Sema/enable_if.c
+++ b/clang/test/Sema/enable_if.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -verify
-// RUN: %clang_cc1 %s -DCODEGEN -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fno-builtin-open %s -verify
+// RUN: %clang_cc1 -fno-builtin-open %s -DCODEGEN -emit-llvm -o - | FileCheck 
%s
 
 #define O_CREAT 0x100
 typedef int mode_t;
diff --git a/clang/test/Sema/enum-attr.c b/clang/test/Sema/enum-attr.c
index 1cdd7028e03e0..5a439f51d5912 100644
--- a/clang/test/Sema/enum-attr.c
+++ b/clang/test/Sema/enum-attr.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -Wassign-enum -Wswitch-enum 
-Wcovered-switch-default %s
+// RUN: %clang_cc1 -fno-builtin-open -fsyntax-only -verify -Wassign-enum 
-Wswitch-enum -Wcovered-switch-default %s
 
 enum Enum {
   A0 = 1, A1 = 10
diff --git a/clang/test/Sema/type-dependent-attrs.c 
b/clang/test/Sema/type-dependent-attrs.c
index 13068b3f94ad4..915190d07adf0 100644
--- a/clang/test/Sema/type-dependent-attrs.c
+++ b/clang/test/Sema/type-dependent-attrs.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fno-builtin-open -std=c23 -fsyntax-only -verify %s
 
 int open() { return 0; }
 void close(typeof(open()) *) {}
diff --git a/clang/test/Sema/warn-libc.c b/clang/test/Sema/warn-libc.c
new file mode 100644
index 0000000000000..607cf4a75e891
--- /dev/null
+++ b/clang/test/Sema/warn-libc.c
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify
+// RUN: %clang_cc1 -triple x86_64-linux %s -verify
+// RUN: %clang_cc1 -triple x86_64-linux %s -verify -DO_CREAT='(32 | __linux__)'
+
+
+#define FAUX_CREATE 0100
+#if O_CREAT != FAUX_CREATE
+void call_open_no_creat(void) {
+  __builtin_open("name", FAUX_CREATE, 0777);
+  __builtin_open("name", FAUX_CREATE);
+}
+#endif
+
+#define O_RDONLY 0
+#define O_WRONLY 01
+#define O_RDWR   02
+#ifndef O_CREAT
+#define O_CREAT 0100
+#endif
+#define __O_DIRECTORY  0x10000
+#define __O_TMPFILE   (020000000 | __O_DIRECTORY)
+#define O_TMPFILE  __O_TMPFILE /* Atomically create nameless file.  */
+
+void call_open(void) {
+#if O_CREAT == 64
+  __builtin_open("name", 64); // expected-warning {{nonzero 'mode' argument 
must be specified as the flag 'O_CREAT' would result in file creation}}
+#endif
+  __builtin_open("name", O_TMPFILE | O_RDONLY); // expected-warning {{nonzero 
'mode' argument must be specified as the flag 'O_TMPFILE' would result in file 
creation}}
+  __builtin_open("name", O_TMPFILE + O_RDONLY); // expected-warning {{nonzero 
'mode' argument must be specified as the flag 'O_TMPFILE' would result in file 
creation}}
+  __builtin_open("name", O_TMPFILE); // expected-warning {{nonzero 'mode' 
argument must be specified as the flag 'O_TMPFILE' would result in file 
creation}}
+  __builtin_open("name", O_CREAT); // expected-warning {{nonzero 'mode' 
argument must be specified as the flag 'O_CREAT' would result in file creation}}
+  __builtin_open("name", O_CREAT | O_TMPFILE); // expected-warning {{nonzero 
'mode' argument must be specified as the flags 'O_CREAT' and 'O_TMPFILE' would 
result in file creation}}
+  __builtin_open("name", O_CREAT | O_TMPFILE, 0777);
+  __builtin_open("name", O_CREAT | O_TMPFILE, 0777, 0); // expected-warning 
{{too many arguments passed to 'open'; it expects a maximum of 1 variadic 
parameter}}
+  __builtin_open("name", O_CREAT | O_TMPFILE, 0777, 0, 0); // expected-warning 
{{too many arguments passed to 'open'; it expects a maximum of 1 variadic 
parameter}}
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef unsigned int uint32_t;
+typedef uint32_t mode_t;
+
+mode_t umode(mode_t);
+int open(const char *pathname, int flags, ... /* mode_t mode */ );
+int open64(const char *pathname, int flags, ... /* mode_t mode */ );
+int openat(int fddir, const char *pathname, int flags, ... /* mode_t mode */ );
+int openat64(int fddir, const char *pathname, int flags, ... /* mode_t mode */ 
);
+
+#ifdef __cplusplus
+}
+#endif
+
+void call_openat(void) {
+  __builtin_openat(0, "name", O_CREAT, 0777);
+  __builtin_openat(0, "name", O_CREAT, 01000);
+#if !defined(__linux__)
+  // expected-warning@-2{{invalid mode}}
+#endif
+}
+
+void call_umask(void) {
+  __builtin_umask(0);
+  __builtin_umask(0777);
+  __builtin_umask(01000); // expected-warning {{invalid mode}}
+}
+
+#if defined(__APPLE__)
+#define PATH_MAX 1024
+#elif defined(__linux__)
+#define PATH_MAX 4096
+#endif
+
+void call_realpath() {
+  char too_small[PATH_MAX - 1];
+  char too_big[PATH_MAX + 1];
+  char too_just_right[PATH_MAX];
+
+  __builtin_realpath("hah", too_small); // expected-warning-re {{'realpath' 
distination buffer needs to be larger than than PATH_MAX bytes ({{[0-9]+}}), 
but buffer is {{[0-9]+}}}}
+  __builtin_realpath("hah", too_big);
+  __builtin_realpath("hah", too_just_right);
+}
diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp 
b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
index fb089a811ef92..fb5829ced5f55 100644
--- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
+++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp
@@ -356,6 +356,7 @@ class PrototypeParser {
                                .Case("int32_t", "Zi")
                                .Case("int64_t", "Wi")
                                .Case("jmp_buf", "J")
+                               .Case("mode_t", "UZi")
                                .Case("msint32_t", "Ni")
                                .Case("msuint32_t", "UNi")
                                .Case("objc_super", "M")

>From 6d641fe9842542cd2d94210bdd278ba77559912e Mon Sep 17 00:00:00 2001
From: Colin Kinloch <[email protected]>
Date: Wed, 21 Jan 2026 03:49:17 +0000
Subject: [PATCH 2/2] [clang][Sema] Add bounds checking for libc poll

Manually identify a call to the libc `poll` function and verify that the
fd count argument isn't greater than the number of elements in the fds
array.
---
 clang/lib/Sema/SemaChecking.cpp | 126 ++++++++++++++++++++++++++++++++
 clang/test/Sema/warn-libc.c     |  42 +++++++++++
 2 files changed, 168 insertions(+)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 683ba4efe6424..5b7934d0e0def 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -4563,6 +4563,125 @@ void Sema::CheckConstructorCall(FunctionDecl *FDecl, 
QualType ThisType,
             Loc, SourceRange(), CallType);
 }
 
+static std::optional<llvm::APSInt> GetArrayElementCount(Sema &S,
+                                                        const Expr *BaseExpr) {
+  const Type *EffectiveType =
+      BaseExpr->getType()->getPointeeOrArrayElementType();
+  if (EffectiveType->isDependentType())
+    return {};
+
+  BaseExpr = BaseExpr->IgnoreParenCasts();
+  const ConstantArrayType *ArrayTy =
+      S.Context.getAsConstantArrayType(BaseExpr->getType());
+
+  if (!ArrayTy)
+    return {};
+
+  const Type *BaseType = ArrayTy->getElementType().getTypePtr();
+
+  if (BaseType->isDependentType() || BaseType->isIncompleteType())
+    return {};
+
+  LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
+      S.getLangOpts().getStrictFlexArraysLevel();
+
+  if (BaseExpr->isFlexibleArrayMemberLike(
+          S.Context, StrictFlexArraysLevel,
+          /*IgnoreTemplateOrMacroSubstitution=*/true))
+    return {};
+
+  llvm::APInt ArrayTySize = ArrayTy->getSize();
+  if (BaseType != EffectiveType) {
+    // Make sure we're comparing apples to apples when comparing index to
+    // size.
+    uint64_t ptrarith_typesize = S.Context.getTypeSize(EffectiveType);
+    uint64_t array_typesize = S.Context.getTypeSize(BaseType);
+
+    // Handle ptrarith_typesize being zero, such as when casting to void*.
+    // Use the size in bits (what "getTypeSize()" returns) rather than bytes.
+    if (!ptrarith_typesize)
+      ptrarith_typesize = S.Context.getCharWidth();
+
+    if (ptrarith_typesize != array_typesize) {
+      // There's a cast to a different size type involved.
+      uint64_t ratio = array_typesize / ptrarith_typesize;
+
+      // TODO: Be smarter about handling cases where array_typesize is not a
+      // multiple of ptrarith_typesize.
+      if (ptrarith_typesize * ratio == array_typesize)
+        ArrayTySize *= llvm::APInt(ArrayTySize.getBitWidth(), ratio);
+    }
+  }
+
+  return llvm::APSInt(std::move(ArrayTySize));
+}
+
+static bool CheckLibcPoll(Sema &S, FunctionDecl *FDecl, CallExpr *TheCall) {
+  // Check that the function resembles libc poll
+  if (!S.getSourceManager().isInSystemHeader(FDecl->getLocation()))
+    return false;
+
+  if (TheCall->getNumArgs() != 3)
+    return false;
+
+  if (!FDecl->getReturnType()->isSignedIntegerType())
+    return false;
+
+  const IdentifierTable::iterator It = S.Context.Idents.find("pollfd");
+
+  // If we can't find pollfd cancel the check
+  if (It == S.Context.Idents.end())
+    return false;
+
+  const IdentifierInfo *II = It->second;
+
+  Expr *FdsArg = TheCall->getArg(0);
+  QualType FdsType = FdsArg->getType();
+
+  if (!FdsType->isPointerOrReferenceType() && !FdsType->isArrayType())
+    return false;
+
+  const Type *elType = FdsType->getPointeeOrArrayElementType();
+
+  if (!elType->isRecordType())
+    return false;
+
+  const RecordDecl *RD = elType->getAsRecordDecl();
+  if (II != RD->getIdentifier())
+    return false;
+
+  // Check size type
+  Expr *NfdsArg = TheCall->getArg(1);
+  auto &ExpectedNfdsType = S.Context.UnsignedLongTy;
+  if (S.Context.getTargetInfo().getTriple().isOSDarwin())
+    ExpectedNfdsType = S.Context.UnsignedIntTy;
+
+  if (!S.Context.hasSameType(NfdsArg->getType().getUnqualifiedType(),
+                             ExpectedNfdsType))
+    return false;
+
+  Expr::EvalResult Result;
+  if (!NfdsArg->EvaluateAsInt(Result, S.getASTContext()))
+    return false;
+  llvm::APSInt NfdsValue = Result.Val.getInt();
+  NfdsValue.setIsUnsigned(true);
+
+  std::optional<llvm::APSInt> FdsElCount = GetArrayElementCount(S, FdsArg);
+
+  if (FdsElCount) {
+    if (llvm::APSInt::compareValues(NfdsValue, *FdsElCount) > 0) {
+      SmallString<16> FdsElCountStr;
+      SmallString<16> NfdsValueStr;
+      FdsElCount->toString(FdsElCountStr, /*Radix=*/10);
+      NfdsValue.toString(NfdsValueStr, /*Radix=*/10);
+      S.Diag(TheCall->getBeginLoc(), diag::warn_pollfd_nfds)
+          << NfdsValueStr << FdsElCountStr;
+    }
+  }
+
+  return true;
+}
+
 bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                              const FunctionProtoType *Proto) {
   bool IsMemberOperatorCall = isa<CXXOperatorCallExpr>(TheCall) &&
@@ -4621,6 +4740,13 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, 
CallExpr *TheCall,
   CheckMaxUnsignedZero(TheCall, FDecl);
   CheckInfNaNFunction(TheCall, FDecl);
 
+  if (FDecl->isExternC()) {
+    const IdentifierInfo *II = FDecl->getIdentifier();
+    if (II->isStr("poll")) {
+      CheckLibcPoll(*this, FDecl, TheCall);
+    }
+  }
+
   if (getLangOpts().ObjC)
     ObjC().DiagnoseCStringFormatDirectiveInCFAPI(FDecl, Args, NumArgs);
 
diff --git a/clang/test/Sema/warn-libc.c b/clang/test/Sema/warn-libc.c
index 607cf4a75e891..77ee9505d764e 100644
--- a/clang/test/Sema/warn-libc.c
+++ b/clang/test/Sema/warn-libc.c
@@ -81,3 +81,45 @@ void call_realpath() {
   __builtin_realpath("hah", too_big);
   __builtin_realpath("hah", too_just_right);
 }
+
+# 1 "poll.h" 1 3
+# 1 "sys/poll.h" 1 3
+
+#if defined(__APPLE__)
+typedef unsigned int nfds_t;
+#elif defined(__linux__)
+typedef unsigned long int nfds_t;
+#endif
+
+struct pollfd {
+  int fd;
+  short events;
+  short revents;
+};
+extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout);
+
+# 2 "poll_test.c" 2
+
+#define __builtin_poll poll
+
+void call_poll(void) {
+  struct pollfd fds[] = {
+    {0, 0, 0},
+    {0, 0, 0},
+    {0, 0, 0},
+    {0, 0, 0},
+    {0, 0, 0},
+    {0, 0, 0},
+    {0, 0, 0},
+    {0, 0, 0},
+    {0, 0, 0},
+  };
+  const nfds_t nfds = sizeof(fds) / sizeof(*fds);
+  __builtin_poll(fds, nfds, 0);
+  __builtin_poll(fds, nfds + 1, 0); // expected-warning {{the element count 
value '10' is higher than the number of elements in the array '9'}}
+  __builtin_poll(fds, nfds - 1, 0);
+  /* Unhandled errors */
+  __builtin_poll(&fds[1], nfds, 0);
+  __builtin_poll(fds + 1, nfds, 0);
+  __builtin_poll(fds - 1, nfds, 0);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to