https://github.com/ziqingluo-90 created 
https://github.com/llvm/llvm-project/pull/173096

This commit adds support for functions annotated with 
`__attribute__((__format__(__printf__, ...)))` (or `__scanf__`). These 
functions will be treated the same way as printf/scanf functions in the 
standard C library by `-Wunsafe-buffer-usage`

rdar://143233737

>From 2722518f0d0e41a81bef29cd739f37f640aa57b7 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <[email protected]>
Date: Fri, 19 Dec 2025 12:15:12 -0800
Subject: [PATCH] [-Wunsafe-buffer-usage] Add check for custom printf/scanf
 functions

This commit adds the support for functions annotated with
`__attribute__((__format__(__printf__, ...)))` (or `__scanf__`).
These functions will be treated the same way as printf/scanf functions
in the standard C library by `-Wunsafe-buffer-usage`

rdar://143233737
---
 .../Analyses/UnsafeBufferUsageGadgets.def     |   1 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 115 +++++++++++++++---
 ...arn-unsafe-buffer-usage-libc-functions.cpp |  32 ++++-
 3 files changed, 129 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def 
b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index fae5f8b8aa8e3..f9bba5d54e9c7 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -40,6 +40,7 @@ WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
 WARNING_GADGET(UniquePtrArrayAccess)
 WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall)
+WARNING_OPTIONAL_GADGET(UnsafeFormatAttributedFunctionCall)
 WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, 
arg1)`
 FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified 
Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp 
b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 7ef20726d0ab9..7c21ec86af544 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -825,9 +825,11 @@ struct LibcFunNamePrefixSuffixParser {
 //
 // `UnsafeArg` is the output argument that will be set only if this function
 // returns true.
-static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
-                                  const unsigned FmtArgIdx, ASTContext &Ctx,
-                                  bool isKprintf = false) {
+// `FmtArgIdx` is insignificant if its value is negative, meaning that format
+// arguments start at `FmtIdx` + 1.
+static bool hasUnsafeFormatOrSArg(ASTContext &Ctx, const CallExpr *Call,
+                                  const Expr *&UnsafeArg, const unsigned 
FmtIdx,
+                                  int FmtArgIdx = -1, bool isKprintf = false) {
   class StringFormatStringHandler
       : public analyze_format_string::FormatStringHandler {
     const CallExpr *Call;
@@ -850,8 +852,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, 
const Expr *&UnsafeArg,
       unsigned PArgIdx = -1;
 
       if (Precision.hasDataArgument())
-        PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx;
-      if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) {
+        PArgIdx = Precision.getArgIndex() + FmtArgIdx;
+      if (0 <= PArgIdx && PArgIdx < Call->getNumArgs()) {
         const Expr *PArg = Call->getArg(PArgIdx);
 
         // Strip the cast if `PArg` is a cast-to-int expression:
@@ -886,9 +888,9 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, 
const Expr *&UnsafeArg,
           analyze_printf::PrintfConversionSpecifier::sArg)
         return true; // continue parsing
 
-      unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+      unsigned ArgIdx = FS.getArgIndex() + FmtArgIdx;
 
-      if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs()))
+      if (!(0 <= ArgIdx && ArgIdx < Call->getNumArgs()))
         // If the `ArgIdx` is invalid, give up.
         return true; // continue parsing
 
@@ -921,12 +923,15 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, 
const Expr *&UnsafeArg,
     bool isUnsafeArgSet() { return UnsafeArgSet; }
   };
 
-  const Expr *Fmt = Call->getArg(FmtArgIdx);
+  const Expr *Fmt = Call->getArg(FmtIdx);
+  unsigned FmtArgStartingIdx =
+      FmtArgIdx < 0 ? FmtIdx + 1 : static_cast<unsigned>(FmtArgIdx);
 
   if (auto *SL = dyn_cast<clang::StringLiteral>(Fmt->IgnoreParenImpCasts())) {
     if (SL->getCharByteWidth() == 1) {
       StringRef FmtStr = SL->getString();
-      StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
+      StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg,
+                                        Ctx);
 
       return analyze_format_string::ParsePrintfString(
                  Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
@@ -935,7 +940,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, 
const Expr *&UnsafeArg,
     }
 
     if (auto FmtStr = SL->tryEvaluateString(Ctx)) {
-      StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
+      StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg,
+                                        Ctx);
       return analyze_format_string::ParsePrintfString(
                  Handler, FmtStr->data(), FmtStr->data() + FmtStr->size(),
                  Ctx.getLangOpts(), Ctx.getTargetInfo(), isKprintf) &&
@@ -946,7 +952,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, 
const Expr *&UnsafeArg,
   // In this case, this call is considered unsafe if at least one argument
   // (including the format argument) is unsafe pointer.
   return llvm::any_of(
-      llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+      llvm::make_range(Call->arg_begin() + FmtArgStartingIdx, Call->arg_end()),
       [&UnsafeArg, &Ctx](const Expr *Arg) -> bool {
         if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg, Ctx)) {
           UnsafeArg = Arg;
@@ -1161,7 +1167,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr 
&Node, ASTContext &Ctx,
     // It is a fprintf:
     const Expr *UnsafeArg;
 
-    if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false)) {
+    if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 1)) {
       Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
       return true;
     }
@@ -1175,7 +1181,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr 
&Node, ASTContext &Ctx,
 
     if (auto *II = FD->getIdentifier())
       isKprintf = II->getName() == "kprintf";
-    if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf)) {
+    if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 0, -1, isKprintf)) {
       Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
       return true;
     }
@@ -1190,7 +1196,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr 
&Node, ASTContext &Ctx,
       // second is an integer, it is a snprintf:
       const Expr *UnsafeArg;
 
-      if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false)) {
+      if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 2)) {
         Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
         return true;
       }
@@ -2068,6 +2074,7 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget 
{
   constexpr static const char *const UnsafeVaListTag =
       "UnsafeLibcFunctionCall_va_list";
 
+public:
   enum UnsafeKind {
     OTHERS = 0,  // no specific information, the callee function is unsafe
     SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
@@ -2080,7 +2087,6 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget 
{
                  // considered unsafe as it is not compile-time check
   } WarnedFunKind = OTHERS;
 
-public:
   UnsafeLibcFunctionCallGadget(const MatchResult &Result)
       : WarningGadget(Kind::UnsafeLibcFunctionCall),
         Call(Result.getNodeAs<CallExpr>(Tag)) {
@@ -2171,6 +2177,85 @@ class UnsafeLibcFunctionCallGadget : public 
WarningGadget {
   SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
 };
 
+class UnsafeFormatAttributedFunctionCallGadget : public WarningGadget {
+  const CallExpr *const Call;
+  const Expr *UnsafeArg = nullptr;
+  constexpr static const char *const Tag = 
"UnsafeFormatAttributedFunctionCall";
+  constexpr static const char *const UnsafeStringTag =
+      "UnsafeFormatAttributedFunctionCall_string";
+
+public:
+  UnsafeFormatAttributedFunctionCallGadget(const MatchResult &Result)
+      : WarningGadget(Kind::UnsafeLibcFunctionCall),
+        Call(Result.getNodeAs<CallExpr>(Tag)),
+        UnsafeArg(Result.getNodeAs<Expr>(UnsafeStringTag)) {}
+
+  static bool matches(const Stmt *S, ASTContext &Ctx,
+                      const UnsafeBufferUsageHandler *Handler,
+                      MatchResult &Result) {
+    if (ignoreUnsafeLibcCall(Ctx, *S, Handler))
+      return false;
+    auto *CE = dyn_cast<CallExpr>(S);
+    if (!CE || !CE->getDirectCallee())
+      return false;
+    const auto *FD = dyn_cast<FunctionDecl>(CE->getDirectCallee());
+    if (!FD)
+      return false;
+
+    const FormatAttr *Attr = nullptr;
+    bool IsPrintf = false;
+    bool AnyAttr = llvm::any_of(
+        FD->specific_attrs<FormatAttr>(),
+        [&Attr, &IsPrintf](const FormatAttr *FA) -> bool {
+          if (const auto *II = FA->getType()) {
+            if (II->getName() == "printf" || II->getName() == "scanf") {
+              Attr = FA;
+              IsPrintf = II->getName() == "printf";
+              return true;
+            }
+          }
+          return false;
+        });
+    const Expr *UnsafeArg;
+
+    if (AnyAttr && !IsPrintf &&
+        (CE->getNumArgs() >= static_cast<unsigned>(Attr->getFirstArg()))) {
+      // for scanf-like functions, any format argument is considered unsafe:
+      Result.addNode(Tag, DynTypedNode::create(*CE));
+      return true;
+    }
+    if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg(
+                       Ctx, CE, UnsafeArg,
+                       // FormatAttribute indexes are 1-based:
+                       Attr->getFormatIdx() - 1, Attr->getFirstArg() - 1)) {
+      Result.addNode(Tag, DynTypedNode::create(*CE));
+      Result.addNode(UnsafeStringTag, DynTypedNode::create(*UnsafeArg));
+      return true;
+    }
+    return false;
+  }
+
+  const Stmt *getBaseStmt() const { return Call; }
+
+  SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
+
+  void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+                             bool IsRelatedToDecl,
+                             ASTContext &Ctx) const override {
+    if (UnsafeArg)
+      Handler.handleUnsafeLibcCall(
+          Call, UnsafeLibcFunctionCallGadget::UnsafeKind::STRING, Ctx,
+          UnsafeArg);
+    else
+      Handler.handleUnsafeLibcCall(
+          Call, UnsafeLibcFunctionCallGadget::UnsafeKind::OTHERS, Ctx);
+  }
+
+  DeclUseList getClaimedVarUseSites() const override { return {}; }
+
+  SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
+};
+
 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
 // Context (see `findStmtsInUnspecifiedLvalueContext`).
 // Note here `[]` is the built-in subscript operator.
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index 4f1af79609223..8df65ebc2eaf0 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\
 // RUN:            -verify %s
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\
 // RUN:            -verify %s -x objective-c++
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call 
-Wno-gcc-compat\
 // RUN:            -verify %s
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call 
-Wno-gcc-compat\
 // RUN:            -verify %s -DTEST_STD_NS
 
 typedef struct {} FILE;
@@ -255,3 +255,27 @@ void dontCrashForInvalidFormatString() {
   snprintf((char*)0, 0, "%");
   snprintf((char*)0, 0, "\0");
 }
+
+
+// Also warn about unsafe printf/scanf-like functions:
+void myprintf(const char *F, ...) __attribute__((__format__ (__printf__, 1, 
2)));
+void myprintf_2(const char *F, int irrelevant, const char *Str) 
__attribute__((__format__ (__printf__, 1, 3)));
+void myscanf(const char *F, ...) __attribute__((__format__ (__scanf__, 1, 2)));
+
+void test_myprintf(char * Str, std::string StdStr) {
+  myprintf("hello", Str);
+  myprintf("hello %s", StdStr.c_str());
+  myprintf("hello %s", Str);  // expected-warning{{function 'myprintf' is 
unsafe}} \
+                                expected-note{{string argument is not 
guaranteed to be null-terminated}}
+
+  myprintf_2("hello", 0, Str);
+  myprintf_2("hello %s", 0, StdStr.c_str());
+  myprintf_2("hello %s", 0, Str);  // expected-warning{{function 'myprintf_2' 
is unsafe}} \
+                                     expected-note{{string argument is not 
guaranteed to be null-terminated}}
+  myscanf("hello %s");
+  myscanf("hello %s", Str); // expected-warning{{function 'myscanf' is unsafe}}
+
+  int X;
+
+  myscanf("hello %d", &X); // expected-warning{{function 'myscanf' is unsafe}}
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to