https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/178054

>From 14c0472a6fc98d07ad374d832f4bf24d6583e80e Mon Sep 17 00:00:00 2001
From: Daniel Krupp <[email protected]>
Date: Mon, 26 Jan 2026 22:17:41 +0100
Subject: [PATCH] [clang][analyzer] Add taintedness to argv

If the execution environment is untrusted, we assume
that the argv of the main function is an attacker
controlled value and set it as an taint analysis source.
---
 clang/docs/analyzer/checkers.rst              |  9 +++-
 .../Checkers/GenericTaintChecker.cpp          | 40 +++++++++++++-
 .../Analysis/taint-diagnostic-visitor-main.c  | 54 +++++++++++++++++++
 clang/test/Analysis/taint-generic.c           | 13 +++++
 4 files changed, 113 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/Analysis/taint-diagnostic-visitor-main.c

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 31edf9e99dc7d..a3ac11935afd8 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1371,9 +1371,16 @@ For a more detailed description of configuration 
options, please see the
 
 **Configuration**
 
-* `Config`  Specifies the name of the YAML configuration file. The user can
+* ``Config`` Specifies the name of the YAML configuration file. The user can
   define their own taint sources and sinks.
 
+* The if the analyzer option ``assume-controlled-environment`` is set to 
``false``,
+  it is assumed that the command line arguments and the environment
+  variables of the program are attacker controlled.
+  In particular, the ``argv`` argument of the ``main`` function and
+  the return value of the ``getenv()`` function are assumed to
+  hold tainted values.
+
 **Related Guidelines**
 
 * `CWE Data Neutralization Issues
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index bea8f3f13ba21..9d88f85c88ad2 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -63,6 +63,8 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 constexpr llvm::StringLiteral MsgCustomSink =
     "Untrusted data is passed to a user-defined sink";
 
+const std::string MsgTaintOrigin = "Taint originated here";
+
 using ArgIdxTy = int;
 using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
 
@@ -159,7 +161,7 @@ const NoteTag *taintOriginTrackerTag(CheckerContext &C,
       return "";
     }
     if (TaintedSymbols.empty())
-      return "Taint originated here";
+      return MsgTaintOrigin;
 
     for (auto Sym : TaintedSymbols) {
       BR.markInteresting(Sym);
@@ -378,10 +380,12 @@ struct GenericTaintRuleParser {
   CheckerManager &Mgr;
 };
 
-class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
+class GenericTaintChecker
+    : public Checker<check::PreCall, check::PostCall, check::BeginFunction> {
 public:
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkBeginFunction(CheckerContext &C) const;
 
   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
                   const char *Sep) const override;
@@ -827,8 +831,40 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
                             std::make_move_iterator(Rules.end()));
 }
 
+// The incoming parameters of the main function get tainted
+// if the program called in an untrusted environment.
+void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const {
+  if (!C.inTopFrame() || C.getAnalysisManager()
+                             .getAnalyzerOptions()
+                             .ShouldAssumeControlledEnvironment)
+    return;
+
+  const auto *FD = dyn_cast<FunctionDecl>(C.getLocationContext()->getDecl());
+  if (!FD || !FD->isMain() || FD->param_size() < 2)
+    return;
+
+  ProgramStateRef State = C.getState();
+  const MemRegion *ArgvReg =
+      State->getRegion(FD->parameters()[1], C.getLocationContext());
+  SVal ArgvSval = State->getSVal(ArgvReg);
+  // Add taintedness to argv**
+  State = addTaint(State, ArgvSval);
+
+  const NoteTag *OriginatingTag =
+      C.getNoteTag([ArgvSval](PathSensitiveBugReport &BR) -> std::string {
+        // We give diagnostics only for taint related reports
+        if (!BR.isInteresting(ArgvSval) ||
+            BR.getBugType().getCategory() != categories::TaintedData)
+          return "";
+
+        return MsgTaintOrigin;
+      });
+  C.addTransition(State, OriginatingTag);
+}
+
 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
                                        CheckerContext &C) const {
+
   initTaintRules(C);
 
   // FIXME: this should be much simpler.
diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c 
b/clang/test/Analysis/taint-diagnostic-visitor-main.c
new file mode 100644
index 0000000000000..315e352cb549f
--- /dev/null
+++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c
@@ -0,0 +1,54 @@
+// RUN: %clang_analyze_cc1 
-analyzer-checker=optin.taint,core,security.ArrayBound -DUNTRUSTED 
-analyzer-config assume-controlled-environment=false -analyzer-output=text 
-verify=expected,untrusted %s
+// RUN: %clang_analyze_cc1 
-analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config 
assume-controlled-environment=true -analyzer-output=text 
-verify=expected,trusted %s
+
+// This file is for testing enhanced diagnostics produced by the 
GenericTaintChecker
+
+typedef __typeof(sizeof(int)) size_t;
+struct _IO_FILE;
+typedef struct _IO_FILE FILE;
+
+int scanf(const char *restrict format, ...);
+int system(const char *command);
+char* getenv( const char* env_var );
+size_t strlen( const char* str );
+char *strcat( char *dest, const char *src );
+char * strncat ( char * destination, const char * source, size_t num );
+char* strcpy( char* dest, const char* src );
+char * strncpy ( char * destination, const char * source, size_t num );
+void *malloc(size_t size );
+void free( void *ptr );
+char *fgets(char *str, int n, FILE *stream);
+extern FILE *stdin;
+
+
+#ifdef UNTRUSTED
+// In an untrusted environment the cmd line arguments
+// are assumed to be tainted.
+int main(int argc, char * argv[]) {// untrusted-note {{Taint originated here}}
+   if (argc < 1)// untrusted-note {{'argc' is >= 1}}
+                // untrusted-note@-1 {{Taking false branch}}
+     return 1;
+   char cmd[2048] = "/bin/cat ";
+   char filename[1024];
+   strncpy(filename, argv[1], sizeof(filename)-1); // untrusted-note {{Taint 
propagated to the 1st argument}}
+   strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// untrusted-note 
{{Taint propagated to the 1st argument}}
+   system(cmd);// untrusted-warning {{Untrusted data is passed to a system 
call}}
+               // untrusted-note@-1 {{Untrusted data is passed to a system 
call}}
+   return 0;
+ }
+#else
+int main(int argc, char * argv[]) {
+   if (argc < 1)// trusted-note {{'argc' is >= 1}}
+                // trusted-note@-1 {{Taking false branch}}
+     return 1;
+   char cmd[2048] = "/bin/cat ";
+   char filename[1024];
+   scanf("%s", filename);// trusted-note {{Taint originated here}}
+                         // trusted-note@-1 {{Taint propagated to the 2nd 
argument}}
+   strncat(filename, argv[1], sizeof(filename)- - strlen(argv[1]) - 1);// 
trusted-note {{Taint propagated to the 1st argument}}
+   strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// trusted-note {{Taint 
propagated to the 1st argument}}
+   system(cmd);// trusted-warning {{Untrusted data is passed to a system call}}
+               // trusted-note@-1 {{Untrusted data is passed to a system call}}
+   return 0;
+ }
+ #endif
diff --git a/clang/test/Analysis/taint-generic.c 
b/clang/test/Analysis/taint-generic.c
index 6017483f06b6d..93a5b9b3a53cd 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -116,6 +116,7 @@ char *stpcpy(char *restrict s1, const char *restrict s2);
 char *strncpy( char * destination, const char * source, size_t num );
 char *strndup(const char *s, size_t n);
 char *strncat(char *restrict s1, const char *restrict s2, size_t n);
+char *strcat( char *dest, const char *src );
 
 void *malloc(size_t);
 void *calloc(size_t nmemb, size_t size);
@@ -1396,3 +1397,15 @@ void testAcceptPropagates() {
   int acceptSocket = accept(listenSocket, 0, 0);
   clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}}
 }
+
+int main(int argc, char * argv[]) {
+   if (argc < 1)
+     return 1;
+   char cmd[2048] = "/bin/cat ";
+   char filename[1024];
+   clang_analyzer_isTainted_char(*argv[1]); // expected-warning{{YES}}
+   strncat(cmd, argv[1], sizeof(cmd) - strlen(cmd)-1);
+   system(cmd);// expected-warning {{Untrusted data is passed to a system 
call}}
+   return 0;
+ }
+ 
\ No newline at end of file

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to