https://github.com/bozicrHT created 
https://github.com/llvm/llvm-project/pull/158626

Annotate printf/scanf and related builtins with the nonnull attribute on their 
format string parameters. This enables diagnostics when NULL is passed, 
matching GCC behavior. Updated existing Sema tests and added new one for 
coverage.

From 660c921373932516e9dad280229accbc5df78c8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radovan=20Bo=C5=BEi=C4=87?= <[email protected]>
Date: Mon, 15 Sep 2025 13:13:10 +0200
Subject: [PATCH] [clang][sema] Add nonnull attribute to builtin format
 functions

Annotate printf/scanf and related builtins with the nonnull attribute on
their format string parameters. This enables diagnostics when NULL is
passed, matching GCC behavior. Updated existing Sema tests and added new
one for coverage.
---
 clang/include/clang/Basic/Builtins.def    |  1 +
 clang/include/clang/Basic/Builtins.h      |  4 ++
 clang/include/clang/Basic/Builtins.td     | 31 +++++-----
 clang/include/clang/Basic/BuiltinsBase.td |  2 +-
 clang/lib/Basic/Builtins.cpp              | 28 +++++++++
 clang/lib/Sema/SemaDecl.cpp               |  9 +++
 clang/test/Sema/format-strings-nonnull.c  | 74 +++++++++++++++++++++++
 clang/test/Sema/format-strings.c          |  6 +-
 clang/test/SemaCXX/format-strings-0x.cpp  |  2 +
 clang/test/SemaObjC/format-strings-objc.m |  3 +-
 10 files changed, 139 insertions(+), 21 deletions(-)
 create mode 100644 clang/test/Sema/format-strings-nonnull.c

diff --git a/clang/include/clang/Basic/Builtins.def 
b/clang/include/clang/Basic/Builtins.def
index 48437c9397570..dfd54c974c322 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -67,6 +67,7 @@
 // The third value provided to the macro specifies information about attributes
 // of the function.  These must be kept in sync with the predicates in the
 // Builtin::Context class.  Currently we have:
+//  N -> nonnull
 //  n -> nothrow
 //  r -> noreturn
 //  U -> pure
diff --git a/clang/include/clang/Basic/Builtins.h 
b/clang/include/clang/Basic/Builtins.h
index 3a5e31de2bc50..68d7043cac1bf 100644
--- a/clang/include/clang/Basic/Builtins.h
+++ b/clang/include/clang/Basic/Builtins.h
@@ -392,6 +392,10 @@ class Context {
   bool performsCallback(unsigned ID,
                         llvm::SmallVectorImpl<int> &Encoding) const;
 
+  /// Return true if this builtin has parameters at fixed positions
+  /// that must be non-null.
+  bool IsNonNull(unsigned ID, llvm::SmallVectorImpl<int> &Indxs) const;
+
   /// Return true if this function has no side effects and doesn't
   /// read memory, except for possibly errno or raising FP exceptions.
   ///
diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 27639f06529cb..373715b07f732 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -3083,104 +3083,105 @@ def StrLen : LibBuiltin<"string.h"> {
 // FIXME: This list is incomplete.
 def Printf : LibBuiltin<"stdio.h"> {
   let Spellings = ["printf"];
-  let Attributes = [PrintfFormat<0>];
+  let Attributes = [PrintfFormat<0>, NonNull<[0]>];
   let Prototype = "int(char const*, ...)";
 }
 
 // FIXME: The builtin and library function should have the same signature.
 def BuiltinPrintf : Builtin {
   let Spellings = ["__builtin_printf"];
-  let Attributes = [NoThrow, PrintfFormat<0>, FunctionWithBuiltinPrefix];
+  let Attributes = [NoThrow, PrintfFormat<0>, FunctionWithBuiltinPrefix,
+                    NonNull<[0]>];
   let Prototype = "int(char const* restrict, ...)";
 }
 
 def FPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["fprintf"];
-  let Attributes = [NoThrow, PrintfFormat<1>];
+  let Attributes = [NoThrow, PrintfFormat<1>, NonNull<[1]>];
   let Prototype = "int(FILE* restrict, char const* restrict, ...)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def SnPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["snprintf"];
-  let Attributes = [NoThrow, PrintfFormat<2>];
+  let Attributes = [NoThrow, PrintfFormat<2>, NonNull<[2]>];
   let Prototype = "int(char* restrict, size_t, char const* restrict, ...)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def SPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["sprintf"];
-  let Attributes = [NoThrow, PrintfFormat<1>];
+  let Attributes = [NoThrow, PrintfFormat<1>, NonNull<[1]>];
   let Prototype = "int(char* restrict, char const* restrict, ...)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vprintf"];
-  let Attributes = [NoThrow, VPrintfFormat<0>];
+  let Attributes = [NoThrow, VPrintfFormat<0>, NonNull<[0]>];
   let Prototype = "int(char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VfPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vfprintf"];
-  let Attributes = [NoThrow, VPrintfFormat<1>];
+  let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<[1]>];
   let Prototype = "int(FILE* restrict, char const* restrict, 
__builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VsnPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vsnprintf"];
-  let Attributes = [NoThrow, VPrintfFormat<2>];
+  let Attributes = [NoThrow, VPrintfFormat<2>, NonNull<[2]>];
   let Prototype = "int(char* restrict, size_t, char const* restrict, 
__builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VsPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vsprintf"];
-  let Attributes = [NoThrow, VPrintfFormat<1>];
+  let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<[1]>];
   let Prototype = "int(char* restrict, char const* restrict, 
__builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def Scanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["scanf"];
-  let Attributes = [ScanfFormat<0>];
+  let Attributes = [ScanfFormat<0>, NonNull<[0]>];
   let Prototype = "int(char const* restrict, ...)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def FScanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["fscanf"];
-  let Attributes = [ScanfFormat<1>];
+  let Attributes = [ScanfFormat<1>, NonNull<[1]>];
   let Prototype = "int(FILE* restrict, char const* restrict, ...)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def SScanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["sscanf"];
-  let Attributes = [ScanfFormat<1>];
+  let Attributes = [ScanfFormat<1>, NonNull<[1]>];
   let Prototype = "int(char const* restrict, char const* restrict, ...)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VScanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vscanf"];
-  let Attributes = [VScanfFormat<0>];
+  let Attributes = [VScanfFormat<0>, NonNull<[0]>];
   let Prototype = "int(char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VFScanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vfscanf"];
-  let Attributes = [VScanfFormat<1>];
+  let Attributes = [VScanfFormat<1>, NonNull<[1]>];
   let Prototype = "int(FILE* restrict, char const* restrict, 
__builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VSScanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vsscanf"];
-  let Attributes = [VScanfFormat<1>];
+  let Attributes = [VScanfFormat<1>, NonNull<[1]>];
   let Prototype = "int(char const* restrict, char const* restrict, 
__builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
diff --git a/clang/include/clang/Basic/BuiltinsBase.td 
b/clang/include/clang/Basic/BuiltinsBase.td
index 09bc9f89059fe..73918ab167b8d 100644
--- a/clang/include/clang/Basic/BuiltinsBase.td
+++ b/clang/include/clang/Basic/BuiltinsBase.td
@@ -32,7 +32,6 @@ def Const : Attribute<"c">;
 def NoThrow : Attribute<"n">;
 def Pure : Attribute<"U">;
 def ReturnsTwice : Attribute<"j">;
-//  FIXME: gcc has nonnull
 
 // builtin-specific attributes
 // ---------------------------
@@ -85,6 +84,7 @@ def Consteval : Attribute<"EG">;
 // Callback behavior: the first index argument is called with the arguments
 // indicated by the remaining indices.
 class Callback<list<int> ArgIndices> : MultiIndexAttribute<"C", ArgIndices>;
+class NonNull<list<int> ArgIndices> : MultiIndexAttribute<"N", ArgIndices>;
 
 // Prefixes
 // ========
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
index acd98fe84adf5..ae94a5b740540 100644
--- a/clang/lib/Basic/Builtins.cpp
+++ b/clang/lib/Basic/Builtins.cpp
@@ -293,6 +293,34 @@ bool Builtin::Context::isScanfLike(unsigned ID, unsigned 
&FormatIdx,
   return isLike(ID, FormatIdx, HasVAListArg, "sS");
 }
 
+bool Builtin::Context::IsNonNull(unsigned ID,
+                                 llvm::SmallVectorImpl<int> &Indxs) const {
+
+  const char *CalleePos = ::strchr(getAttributesString(ID), 'N');
+  if (!CalleePos)
+    return false;
+
+  ++CalleePos;
+  assert(*CalleePos == '<' &&
+         "Callback callee specifier must be followed by a '<'");
+  ++CalleePos;
+
+  char *EndPos;
+  int CalleeIdx = ::strtol(CalleePos, &EndPos, 10);
+  assert(CalleeIdx >= 0 && "Callee index is supposed to be positive!");
+  Indxs.push_back(CalleeIdx);
+
+  while (*EndPos == ',') {
+    const char *PayloadPos = EndPos + 1;
+
+    int PayloadIdx = ::strtol(PayloadPos, &EndPos, 10);
+    Indxs.push_back(PayloadIdx);
+  }
+
+  assert(*EndPos == '>' && "Callback callee specifier must end with a '>'");
+  return true;
+}
+
 bool Builtin::Context::performsCallback(unsigned ID,
                                         SmallVectorImpl<int> &Encoding) const {
   const char *CalleePos = ::strchr(getAttributesString(ID), 'C');
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 365ebb63b1559..7c40f665ea7aa 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -17097,6 +17097,15 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl 
*FD) {
       }
     }
 
+    SmallVector<int, 4> Indxs;
+    if (Context.BuiltinInfo.IsNonNull(BuiltinID, Indxs) &&
+        !FD->hasAttr<NonNullAttr>()) {
+      llvm::SmallVector<ParamIdx, 4> ParamIndxs;
+      for (int I : Indxs)
+        ParamIndxs.push_back(ParamIdx(I + 1, FD));
+      FD->addAttr(NonNullAttr::CreateImplicit(Context, ParamIndxs.data(),
+                                              ParamIndxs.size()));
+    }
     if (Context.BuiltinInfo.isReturnsTwice(BuiltinID) &&
         !FD->hasAttr<ReturnsTwiceAttr>())
       FD->addAttr(ReturnsTwiceAttr::CreateImplicit(Context,
diff --git a/clang/test/Sema/format-strings-nonnull.c 
b/clang/test/Sema/format-strings-nonnull.c
new file mode 100644
index 0000000000000..6e00dcb55cb3d
--- /dev/null
+++ b/clang/test/Sema/format-strings-nonnull.c
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -Wnonnull -Wno-format-security %s
+
+#include <stdarg.h>
+#include <stddef.h>
+
+typedef struct _FILE FILE;
+
+int printf(char const* restrict, ...);
+int __builtin_printf(char const* restrict, ...);
+int fprintf(FILE* restrict, char const* restrict, ...);
+int snprintf(char* restrict, size_t, char const* restrict, ...);
+int sprintf(char* restrict, char const* restrict, ...);
+int vprintf(char const* restrict, __builtin_va_list);
+int vfprintf(FILE* restrict, char const* restrict, __builtin_va_list);
+int vsnprintf(char* restrict, size_t, char const* restrict, __builtin_va_list);
+int vsprintf(char* restrict, char const* restrict, __builtin_va_list);
+
+int scanf(char const* restrict, ...);
+int fscanf(FILE* restrict, char const* restrict, ...);
+int sscanf(char const* restrict, char const* restrict, ...);
+int vscanf(char const* restrict, __builtin_va_list);
+int vfscanf(FILE* restrict, char const* restrict, __builtin_va_list);
+int vsscanf(char const* restrict, char const* restrict, __builtin_va_list);
+
+
+void check_format_string(FILE *fp, va_list ap) {
+    char buf[256];
+    char* const fmt = NULL;
+
+    printf(fmt);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    __builtin_printf(NULL, "xxd");
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    fprintf(fp, NULL, 25);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    sprintf(buf, NULL, 42);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    snprintf(buf, 10, 0, 42);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    vprintf(fmt, ap);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    vfprintf(fp, 0, ap);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    vsprintf(buf, NULL, ap);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    vsnprintf(buf, 10, fmt, ap);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    scanf(NULL);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    fscanf(fp, NULL);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    sscanf(buf, fmt);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    vscanf(NULL, ap);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    vfscanf(fp, fmt, ap);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+
+    vsscanf(buf, NULL, ap);
+    // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
+}
\ No newline at end of file
diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c
index af30ad5d15fe2..431f12d50a0f4 100644
--- a/clang/test/Sema/format-strings.c
+++ b/clang/test/Sema/format-strings.c
@@ -480,11 +480,9 @@ void pr7981(wint_t c, wchar_t c2) {
 #endif
 }
 
-// -Wformat-security says NULL is not a string literal
 void rdar8269537(void) {
-  // This is likely to crash in most cases, but -Wformat-nonliteral technically
-  // doesn't warn in this case.
-  printf(0); // no-warning
+  printf(0);
+  // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
 }
 
 // Handle functions with multiple format attributes.
diff --git a/clang/test/SemaCXX/format-strings-0x.cpp 
b/clang/test/SemaCXX/format-strings-0x.cpp
index 7d37f8276f29f..e0ca7a270c993 100644
--- a/clang/test/SemaCXX/format-strings-0x.cpp
+++ b/clang/test/SemaCXX/format-strings-0x.cpp
@@ -14,6 +14,7 @@ void f(char **sp, float *fp) {
   printf("%a", 1.0);
   scanf("%afoobar", fp);
   printf(nullptr);
+  // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
   printf(*sp); // expected-warning {{not a string literal}}
   // expected-note@-1{{treat the string as an argument to avoid this}}
 
@@ -32,4 +33,5 @@ void f(char **sp, float *fp) {
   printf("init list: %d", { 0 }); // expected-error {{cannot pass initializer 
list to variadic function; expected type from format string was 'int'}}
   printf("void: %d", f(sp, fp)); // expected-error {{cannot pass expression of 
type 'void' to variadic function; expected type from format string was 'int'}}
   printf(0, { 0 }); // expected-error {{cannot pass initializer list to 
variadic function}}
+  // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
 }
diff --git a/clang/test/SemaObjC/format-strings-objc.m 
b/clang/test/SemaObjC/format-strings-objc.m
index 40c1d31b1fd4c..babbb40394267 100644
--- a/clang/test/SemaObjC/format-strings-objc.m
+++ b/clang/test/SemaObjC/format-strings-objc.m
@@ -130,7 +130,7 @@ void rdar10743758(id x) {
   printf(s2); // expected-warning {{more '%' conversions than data arguments}}
 
   const char * const s3 = (const char *)0;
-  printf(s3); // no-warning (NULL is a valid format string)
+  printf(s3); // expected-warning {{null passed to a callee that requires a 
non-null argument}}
 
   NSString * const ns1 = @"constant string %s"; // expected-note {{format 
string is defined here}}
   NSLog(ns1); // expected-warning {{more '%' conversions than data arguments}}
@@ -259,6 +259,7 @@ void testByValueObjectInFormat(Foo *obj) {
   printf("%d %d %d", 1L, *obj, 1L); // expected-error {{cannot pass object 
with interface type 'Foo' by value to variadic function; expected type from 
format string was 'int'}} expected-warning 2 {{format specifies type 'int' but 
the argument has type 'long'}}
   printf("%!", *obj); // expected-error {{cannot pass object with interface 
type 'Foo' by value through variadic function}} expected-warning {{invalid 
conversion specifier}}
   printf(0, *obj); // expected-error {{cannot pass object with interface type 
'Foo' by value through variadic function}}
+  // expected-warning@-1{{null passed to a callee that requires a non-null 
argument}}
 
   [Bar log2:@"%d", *obj]; // expected-error {{cannot pass object with 
interface type 'Foo' by value to variadic method; expected type from format 
string was 'int'}}
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to