vlad.tsyrklevich created this revision.
vlad.tsyrklevich added reviewers: zaks.anna, cfe-commits.

While extending the GenericTaintChecker for my purposes I'm hitting a case 
where taint is not propagated where I believe it should be. Currently, the 
following example will propagate taint correctly:

  char buf[16];
  taint_source(buf);
  taint_sink(buf);

However, the following fails:

  char buf[16];
  taint_source(&buf);
  taint_sink(buf);

In the first example, buf has it's symbol correctly extracted (via 
`GenericTaintChecker::getPointedToSymbol()`) as a  
`SymbolDerived{conj_$N{char}, element{buf,0 S64b,char}}`, it's marked as 
tainted and then the taint check correctly finds it using 
`ProgramState::isTainted()`.

In the second example, the SVal obtained in 
`GenericTaintChecker::getPointedToSymbol()` is a LazyCompoundVal so 
`SVal::getAsSymbol()` returns a NULL SymbolRef, meaning the symbol is not 
tainted.

This change extends GenericTaintChecker to obtain a SymbolRegionValue from the 
LCV MemRegion in `getPointedToSymbol()`, and then extends 
`ProgramState::isTainted()` to correctly match a `SymbolRegionValue{buf}` 
against a `SymbolDerived{conj_$N{char}, element{buf,0 S64b,char}}`.

I'm not familiar enough with analyzer internals to be confident in whether this 
is the right approach to fix this bug, so feedback is welcome.


https://reviews.llvm.org/D28445

Files:
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Core/ProgramState.cpp
  test/Analysis/taint-generic.c


Index: test/Analysis/taint-generic.c
===================================================================
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -169,6 +169,11 @@
   sock = socket(AF_LOCAL, SOCK_STREAM, 0);
   read(sock, buffer, 100);
   execl(buffer, "filename", 0); // no-warning
+
+  sock = socket(AF_INET, SOCK_STREAM, 0);
+  // References to both buffer and &buffer as an argument should taint the 
argument
+  read(sock, &buffer, 100);
+  execl(buffer, "filename", 0); // expected-warning {{Untrusted data is passed 
to a system call}}
 }
 
 int testDivByZero() {
Index: lib/StaticAnalyzer/Core/ProgramState.cpp
===================================================================
--- lib/StaticAnalyzer/Core/ProgramState.cpp
+++ lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -690,6 +690,15 @@
   if (!Reg)
     return false;
 
+  if (const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(Reg)) {
+    SymbolRef Sym = getSymbolManager().getRegionValueSymbol(TVR);
+
+    // We don't call isTainted(Sym, K) here because it could lead to infinite 
recursion.
+    const TaintTagType *Tag = get<TaintMap>(Sym);
+    if (Tag && *Tag == K)
+      return true;
+  }
+
   // Element region (array element) is tainted if either the base or the offset
   // are tainted.
   if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
@@ -720,7 +729,8 @@
 
     // If this is a SymbolDerived with a tainted parent, it's also tainted.
     if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI))
-      Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind);
+      Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind)
+                        || isTainted(SD->getOriginRegion(), Kind);
 
     // If memory region is tainted, data is also tainted.
     if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI))
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -438,6 +438,10 @@
     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
   SVal Val = State->getSVal(*AddrLoc,
                             ArgTy ? ArgTy->getPointeeType(): QualType());
+
+  if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>())
+    return C.getSymbolManager().getRegionValueSymbol(LCV->getRegion());
+
   return Val.getAsSymbol();
 }
 


Index: test/Analysis/taint-generic.c
===================================================================
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -169,6 +169,11 @@
   sock = socket(AF_LOCAL, SOCK_STREAM, 0);
   read(sock, buffer, 100);
   execl(buffer, "filename", 0); // no-warning
+
+  sock = socket(AF_INET, SOCK_STREAM, 0);
+  // References to both buffer and &buffer as an argument should taint the argument
+  read(sock, &buffer, 100);
+  execl(buffer, "filename", 0); // expected-warning {{Untrusted data is passed to a system call}}
 }
 
 int testDivByZero() {
Index: lib/StaticAnalyzer/Core/ProgramState.cpp
===================================================================
--- lib/StaticAnalyzer/Core/ProgramState.cpp
+++ lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -690,6 +690,15 @@
   if (!Reg)
     return false;
 
+  if (const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(Reg)) {
+    SymbolRef Sym = getSymbolManager().getRegionValueSymbol(TVR);
+
+    // We don't call isTainted(Sym, K) here because it could lead to infinite recursion.
+    const TaintTagType *Tag = get<TaintMap>(Sym);
+    if (Tag && *Tag == K)
+      return true;
+  }
+
   // Element region (array element) is tainted if either the base or the offset
   // are tainted.
   if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
@@ -720,7 +729,8 @@
 
     // If this is a SymbolDerived with a tainted parent, it's also tainted.
     if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI))
-      Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind);
+      Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind)
+                        || isTainted(SD->getOriginRegion(), Kind);
 
     // If memory region is tainted, data is also tainted.
     if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI))
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -438,6 +438,10 @@
     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
   SVal Val = State->getSVal(*AddrLoc,
                             ArgTy ? ArgTy->getPointeeType(): QualType());
+
+  if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>())
+    return C.getSymbolManager().getRegionValueSymbol(LCV->getRegion());
+
   return Val.getAsSymbol();
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to