llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-static-analyzer-1

Author: Daniel Krupp (dkrupp)

<details>
<summary>Changes</summary>

If the execution environment is untrusted, we assume that the argv of the main 
function is an attacker
controlled value and set it as an taint analysis source.

---
Full diff: https://github.com/llvm/llvm-project/pull/178054.diff


4 Files Affected:

- (modified) clang/docs/analyzer/checkers.rst (+7) 
- (modified) clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (+38-2) 
- (added) clang/test/Analysis/taint-diagnostic-visitor-main.c (+54) 
- (modified) clang/test/Analysis/taint-generic.c (+13) 


``````````diff
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 31edf9e99dc7d..178156c9b2f07 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1374,6 +1374,13 @@ For a more detailed description of configuration 
options, please see the
 * `Config`  Specifies the name of the YAML configuration file. The user can
   define their own taint sources and sinks.
 
+* The if the analyzer option `assume-controlled-environment` is set to `false`,
+  it is assumed that the command line arguments and the environment
+  variables of the program are attacker controlled.
+  In particular, the `argv` argument of the `main` function and
+  the return value of the `getenv()` function are assumed to
+  hold tainted values.
+
 **Related Guidelines**
 
 * `CWE Data Neutralization Issues
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index bea8f3f13ba21..69af9919e34af 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -63,6 +63,8 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 constexpr llvm::StringLiteral MsgCustomSink =
     "Untrusted data is passed to a user-defined sink";
 
+const std::string MsgTaintOrigin = "Taint originated here";
+
 using ArgIdxTy = int;
 using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
 
@@ -159,7 +161,7 @@ const NoteTag *taintOriginTrackerTag(CheckerContext &C,
       return "";
     }
     if (TaintedSymbols.empty())
-      return "Taint originated here";
+      return MsgTaintOrigin;
 
     for (auto Sym : TaintedSymbols) {
       BR.markInteresting(Sym);
@@ -378,10 +380,12 @@ struct GenericTaintRuleParser {
   CheckerManager &Mgr;
 };
 
-class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
+class GenericTaintChecker
+    : public Checker<check::PreCall, check::PostCall, check::BeginFunction> {
 public:
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkBeginFunction(CheckerContext &C) const;
 
   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
                   const char *Sep) const override;
@@ -827,8 +831,40 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
                             std::make_move_iterator(Rules.end()));
 }
 
+// The incoming parameters of the main function get tainted
+// if the program called in an untrusted environment.
+void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const {
+  if (!C.inTopFrame() || C.getAnalysisManager()
+                             .getAnalyzerOptions()
+                             .ShouldAssumeControlledEnvironment)
+    return;
+
+  const auto *FD = dyn_cast<FunctionDecl>(C.getLocationContext()->getDecl());
+  if (!FD || FD->param_size() < 2 || !FD->isMain())
+    return;
+
+  ProgramStateRef State = C.getState();
+  const MemRegion *ArgvReg =
+      State->getRegion(FD->parameters()[1], C.getLocationContext());
+  SVal ArgvSval = State->getSVal(ArgvReg);
+  // Add taintedness to argv**
+  State = addTaint(State, ArgvSval);
+
+  const NoteTag *OriginatingTag =
+      C.getNoteTag([ArgvSval](PathSensitiveBugReport &BR) -> std::string {
+        // We give diagnostics only for taint related reports
+        if (!BR.isInteresting(ArgvSval) ||
+            BR.getBugType().getCategory() != categories::TaintedData)
+          return "";
+
+        return MsgTaintOrigin;
+      });
+  C.addTransition(State, OriginatingTag);
+}
+
 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
                                        CheckerContext &C) const {
+
   initTaintRules(C);
 
   // FIXME: this should be much simpler.
diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c 
b/clang/test/Analysis/taint-diagnostic-visitor-main.c
new file mode 100644
index 0000000000000..315e352cb549f
--- /dev/null
+++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c
@@ -0,0 +1,54 @@
+// RUN: %clang_analyze_cc1 
-analyzer-checker=optin.taint,core,security.ArrayBound -DUNTRUSTED 
-analyzer-config assume-controlled-environment=false -analyzer-output=text 
-verify=expected,untrusted %s
+// RUN: %clang_analyze_cc1 
-analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config 
assume-controlled-environment=true -analyzer-output=text 
-verify=expected,trusted %s
+
+// This file is for testing enhanced diagnostics produced by the 
GenericTaintChecker
+
+typedef __typeof(sizeof(int)) size_t;
+struct _IO_FILE;
+typedef struct _IO_FILE FILE;
+
+int scanf(const char *restrict format, ...);
+int system(const char *command);
+char* getenv( const char* env_var );
+size_t strlen( const char* str );
+char *strcat( char *dest, const char *src );
+char * strncat ( char * destination, const char * source, size_t num );
+char* strcpy( char* dest, const char* src );
+char * strncpy ( char * destination, const char * source, size_t num );
+void *malloc(size_t size );
+void free( void *ptr );
+char *fgets(char *str, int n, FILE *stream);
+extern FILE *stdin;
+
+
+#ifdef UNTRUSTED
+// In an untrusted environment the cmd line arguments
+// are assumed to be tainted.
+int main(int argc, char * argv[]) {// untrusted-note {{Taint originated here}}
+   if (argc < 1)// untrusted-note {{'argc' is >= 1}}
+                // untrusted-note@-1 {{Taking false branch}}
+     return 1;
+   char cmd[2048] = "/bin/cat ";
+   char filename[1024];
+   strncpy(filename, argv[1], sizeof(filename)-1); // untrusted-note {{Taint 
propagated to the 1st argument}}
+   strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// untrusted-note 
{{Taint propagated to the 1st argument}}
+   system(cmd);// untrusted-warning {{Untrusted data is passed to a system 
call}}
+               // untrusted-note@-1 {{Untrusted data is passed to a system 
call}}
+   return 0;
+ }
+#else
+int main(int argc, char * argv[]) {
+   if (argc < 1)// trusted-note {{'argc' is >= 1}}
+                // trusted-note@-1 {{Taking false branch}}
+     return 1;
+   char cmd[2048] = "/bin/cat ";
+   char filename[1024];
+   scanf("%s", filename);// trusted-note {{Taint originated here}}
+                         // trusted-note@-1 {{Taint propagated to the 2nd 
argument}}
+   strncat(filename, argv[1], sizeof(filename)- - strlen(argv[1]) - 1);// 
trusted-note {{Taint propagated to the 1st argument}}
+   strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// trusted-note {{Taint 
propagated to the 1st argument}}
+   system(cmd);// trusted-warning {{Untrusted data is passed to a system call}}
+               // trusted-note@-1 {{Untrusted data is passed to a system call}}
+   return 0;
+ }
+ #endif
diff --git a/clang/test/Analysis/taint-generic.c 
b/clang/test/Analysis/taint-generic.c
index 6017483f06b6d..93a5b9b3a53cd 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -116,6 +116,7 @@ char *stpcpy(char *restrict s1, const char *restrict s2);
 char *strncpy( char * destination, const char * source, size_t num );
 char *strndup(const char *s, size_t n);
 char *strncat(char *restrict s1, const char *restrict s2, size_t n);
+char *strcat( char *dest, const char *src );
 
 void *malloc(size_t);
 void *calloc(size_t nmemb, size_t size);
@@ -1396,3 +1397,15 @@ void testAcceptPropagates() {
   int acceptSocket = accept(listenSocket, 0, 0);
   clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}}
 }
+
+int main(int argc, char * argv[]) {
+   if (argc < 1)
+     return 1;
+   char cmd[2048] = "/bin/cat ";
+   char filename[1024];
+   clang_analyzer_isTainted_char(*argv[1]); // expected-warning{{YES}}
+   strncat(cmd, argv[1], sizeof(cmd) - strlen(cmd)-1);
+   system(cmd);// expected-warning {{Untrusted data is passed to a system 
call}}
+   return 0;
+ }
+ 
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/178054
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to