dkrupp updated this revision to Diff 511078.
dkrupp marked 21 inline comments as done.
dkrupp added a comment.
@steakhal thanks for your review. I tried to address all your concerns.
I added an extra test case too (multipleTaintSources(..)) which highlights the
limitation of the current patch: If multiple tainted "variables" reach a sink,
we only generate diagnostics for one of them. The main reason is that the
isTainted() function returns a single tainted Symbolref instead of a vector of
Symbolrefs if there are multiple instances.
I highlighted this in the test and the implementation.
I think this could be still an acceptable limitation for now, because as the
user sanitizes one of the tainted variables, he will get a new diagnostics for
the remaining one(s).
So I suggest to address this limitation in follow-up patche(s).
The implementation as is already greatly improves the understandability of the
reports.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D144269/new/
https://reviews.llvm.org/D144269
Files:
clang/include/clang/StaticAnalyzer/Checkers/Taint.h
clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
clang/lib/StaticAnalyzer/Checkers/Taint.cpp
clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
clang/test/Analysis/taint-diagnostic-visitor.c
clang/test/Analysis/taint-tester.c
Index: clang/test/Analysis/taint-tester.c
===================================================================
--- clang/test/Analysis/taint-tester.c
+++ clang/test/Analysis/taint-tester.c
@@ -122,7 +122,7 @@
fscanf(pp, "%d", &ii);
int jj = ii;// expected-warning + {{tainted}}
- fscanf(p, "%d", &ii);
+ fscanf(p, "%d", &ii);// expected-warning + {{tainted}}
int jj2 = ii;// expected-warning + {{tainted}}
ii = 3;
Index: clang/test/Analysis/taint-diagnostic-visitor.c
===================================================================
--- clang/test/Analysis/taint-diagnostic-visitor.c
+++ clang/test/Analysis/taint-diagnostic-visitor.c
@@ -2,13 +2,24 @@
// This file is for testing enhanced diagnostics produced by the GenericTaintChecker
+typedef unsigned long size_t;
+struct _IO_FILE;
+typedef struct _IO_FILE FILE;
+
int scanf(const char *restrict format, ...);
int system(const char *command);
+char* getenv( const char* env_var );
+size_t strlen( const char* str );
+void *malloc(size_t size );
+void free( void *ptr );
+char *fgets(char *str, int n, FILE *stream);
+FILE *stdin;
void taintDiagnostic(void)
{
char buf[128];
scanf("%s", buf); // expected-note {{Taint originated here}}
+ // expected-note@-1 {{Taint propagated to argument 2}}
system(buf); // expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}}
}
@@ -16,6 +27,7 @@
int index;
int Array[] = {1, 2, 3, 4, 5};
scanf("%d", &index); // expected-note {{Taint originated here}}
+ // expected-note@-1 {{Taint propagated to argument 2}}
return Array[index]; // expected-warning {{Out of bound memory access (index is tainted)}}
// expected-note@-1 {{Out of bound memory access (index is tainted)}}
}
@@ -23,6 +35,7 @@
int taintDiagnosticDivZero(int operand) {
scanf("%d", &operand); // expected-note {{Value assigned to 'operand'}}
// expected-note@-1 {{Taint originated here}}
+ // expected-note@-2 {{Taint propagated to argument 2}}
return 10 / operand; // expected-warning {{Division by a tainted value, possibly zero}}
// expected-note@-1 {{Division by a tainted value, possibly zero}}
}
@@ -31,6 +44,71 @@
int x;
scanf("%d", &x); // expected-note {{Value assigned to 'x'}}
// expected-note@-1 {{Taint originated here}}
+ // expected-note@-2 {{Taint propagated to argument 2}}
int vla[x]; // expected-warning {{Declared variable-length array (VLA) has tainted size}}
// expected-note@-1 {{Declared variable-length array (VLA) has tainted size}}
}
+
+
+//Tests if the originated note is correctly placed even if the path is
+//propagating through variables and expressions
+char* taintDiagnosticPropagation(){
+ char *pathbuf;
+ char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
+ // expected-note@-1 {{Taint propagated to the return value}}
+ if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
+ // expected-note@-1 {{Taking true branch}}
+ pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
+ // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+ // expected-note@-2 {{Taint propagated to the return value}}
+ return pathbuf;
+ }
+ return 0;
+}
+
+//Taint origin should be marked correctly even if there are multiple taint
+//sources in the function
+char* taintDiagnosticPropagation2(){
+ char *pathbuf;
+ char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source
+ char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
+ // expected-note@-1 {{Taint propagated to the return value}}
+ char *user_env=getenv("USER_ENV_VAR");//unrelated taint source
+ if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
+ // expected-note@-1 {{Taking true branch}}
+ pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
+ // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+ // expected-note@-2 {{Taint propagated to the return value}}
+ return pathbuf;
+ }
+ return 0;
+}
+
+void testReadStdIn(){
+ char buf[1024];
+ fgets(buf, sizeof(buf), stdin);// expected-note {{Taint originated here}}
+ // expected-note@-1 {{Taint propagated to argument 1}}
+ system(buf);// expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}}
+
+}
+
+void multipleTaintSources(void) {
+ int x,y,z;
+ scanf("%d", &x); // expected-note {{Taint originated here}}
+ // expected-note@-1 {{Taint propagated to argument 2}}
+ scanf("%d", &y); // FIXME: Would be nice to indicate taint originated and propagation here too
+ scanf("%d", &z);
+ int* ptr = (int*) malloc(y + x); // expected-warning {{Untrusted data is used to specify the buffer size}}
+ // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+ free (ptr);
+}
+
+void multipleTaintedArgs(void) {
+ int x,y;
+ scanf("%d %d", &x, &y); // expected-note {{Taint originated here}}
+ // expected-note@-1 {{Taint propagated to argument 3}}
+ // FIXME: Would be nice to indicate taint propagation argument 2
+ int* ptr = (int*) malloc(x + y); // expected-warning {{Untrusted data is used to specify the buffer size}}
+ // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+ free (ptr);
+}
Index: clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
+++ clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
@@ -23,6 +23,7 @@
const char *const CXXMoveSemantics = "C++ move semantics";
const char *const SecurityError = "Security error";
const char *const UnusedCode = "Unused code";
+const char *const TaintedData = "Tainted data used";
} // namespace categories
} // namespace ento
} // namespace clang
Index: clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -55,8 +55,7 @@
const Expr *SizeE) const;
void reportBug(VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State,
- CheckerContext &C,
- std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
+ CheckerContext &C, SymbolRef TaintedSymbol = nullptr) const;
public:
void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
@@ -166,9 +165,8 @@
return nullptr;
// Check if the size is tainted.
- if (isTainted(State, SizeV)) {
- reportBug(VLA_Tainted, SizeE, nullptr, C,
- std::make_unique<TaintBugVisitor>(SizeV));
+ if (SymbolRef TSR = isTainted(State, SizeV)) {
+ reportBug(VLA_Tainted, SizeE, nullptr, C, TSR);
return nullptr;
}
@@ -209,17 +207,24 @@
return State;
}
-void VLASizeChecker::reportBug(
- VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State,
- CheckerContext &C, std::unique_ptr<BugReporterVisitor> Visitor) const {
+void VLASizeChecker::reportBug(VLASize_Kind Kind, const Expr *SizeE,
+ ProgramStateRef State, CheckerContext &C,
+ SymbolRef TaintedSymbol) const {
// Generate an error node.
ExplodedNode *N = C.generateErrorNode(State);
if (!N)
return;
- if (!BT)
- BT.reset(new BuiltinBug(
- this, "Dangerous variable-length array (VLA) declaration"));
+ if (!BT) {
+ if (Kind == VLA_Tainted)
+ BT.reset(new BugType(this,
+ "Dangerous variable-length array (VLA) declaration",
+ categories::TaintedData));
+ else
+ BT.reset(new BugType(this,
+ "Dangerous variable-length array (VLA) declaration",
+ categories::LogicError));
+ }
SmallString<256> buf;
llvm::raw_svector_ostream os(buf);
@@ -243,9 +248,11 @@
}
auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N);
- report->addVisitor(std::move(Visitor));
report->addRange(SizeE->getSourceRange());
bugreporter::trackExpressionValue(N, SizeE, *report);
+ if (TaintedSymbol)
+ report->markInteresting(TaintedSymbol);
+
C.emitReport(std::move(report));
}
Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -144,30 +144,33 @@
return NewState;
}
-bool taint::isTainted(ProgramStateRef State, const Stmt *S,
- const LocationContext *LCtx, TaintTagType Kind) {
+SymbolRef taint::isTainted(ProgramStateRef State, const Stmt *S,
+ const LocationContext *LCtx, TaintTagType Kind) {
SVal val = State->getSVal(S, LCtx);
return isTainted(State, val, Kind);
}
-bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
+SymbolRef taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
if (SymbolRef Sym = V.getAsSymbol())
return isTainted(State, Sym, Kind);
if (const MemRegion *Reg = V.getAsRegion())
return isTainted(State, Reg, Kind);
- return false;
+ return nullptr;
}
-bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
- TaintTagType K) {
+SymbolRef taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
+ TaintTagType K) {
if (!Reg)
- return false;
+ return nullptr;
// Element region (array element) is tainted if either the base or the offset
// are tainted.
- if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
- return isTainted(State, ER->getSuperRegion(), K) ||
- isTainted(State, ER->getIndex(), K);
+ if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) {
+ if (SymbolRef TaintedSym = isTainted(State, ER->getIndex(), K))
+ return TaintedSym;
+ if (SymbolRef TaintedSym = isTainted(State, ER->getSuperRegion(), K))
+ return TaintedSym;
+ }
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
return isTainted(State, SR->getSymbol(), K);
@@ -175,12 +178,13 @@
if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
return isTainted(State, ER->getSuperRegion(), K);
- return false;
+ return nullptr;
}
-bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
+SymbolRef taint::isTainted(ProgramStateRef State, SymbolRef Sym,
+ TaintTagType Kind) {
if (!Sym)
- return false;
+ return nullptr;
// Traverse all the symbols this symbol depends on to see if any are tainted.
for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
@@ -190,14 +194,15 @@
continue;
if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
- if (*Tag == Kind)
- return true;
+ if (*Tag == Kind) {
+ return *SI;
+ }
}
if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
// If this is a SymbolDerived with a tainted parent, it's also tainted.
- if (isTainted(State, SD->getParentSymbol(), Kind))
- return true;
+ if (SymbolRef TSR = isTainted(State, SD->getParentSymbol(), Kind))
+ return TSR;
// If this is a SymbolDerived with the same parent symbol as another
// tainted SymbolDerived and a region that's a sub-region of that tainted
@@ -210,46 +215,25 @@
// complete. For example, this would not currently identify
// overlapping fields in a union as tainted. To identify this we can
// check for overlapping/nested byte offsets.
- if (Kind == I.second && R->isSubRegionOf(I.first))
- return true;
+ if (Kind == I.second && R->isSubRegionOf(I.first)) {
+ return SD->getParentSymbol();
+ }
}
}
}
// If memory region is tainted, data is also tainted.
if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
- if (isTainted(State, SRV->getRegion(), Kind))
- return true;
+ if (SymbolRef TaintedSym = isTainted(State, SRV->getRegion(), Kind))
+ return TaintedSym;
}
// If this is a SymbolCast from a tainted value, it's also tainted.
if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
- if (isTainted(State, SC->getOperand(), Kind))
- return true;
+ if (SymbolRef TaintedSym = isTainted(State, SC->getOperand(), Kind))
+ return TaintedSym;
}
}
- return false;
-}
-
-PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
- BugReporterContext &BRC,
- PathSensitiveBugReport &BR) {
-
- // Find the ExplodedNode where the taint was first introduced
- if (!isTainted(N->getState(), V) ||
- isTainted(N->getFirstPred()->getState(), V))
- return nullptr;
-
- const Stmt *S = N->getStmtForDiagnostics();
- if (!S)
- return nullptr;
-
- const LocationContext *NCtx = N->getLocationContext();
- PathDiagnosticLocation L =
- PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
- if (!L.isValid() || !L.asLocation().isValid())
- return nullptr;
-
- return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
+ return nullptr;
}
Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -32,6 +32,7 @@
#include <memory>
#include <optional>
#include <utility>
+#include <vector>
#define DEBUG_TYPE "taint-checker"
@@ -114,47 +115,111 @@
return false;
}
-SVal getPointeeOf(const CheckerContext &C, Loc LValue) {
- const QualType ArgTy = LValue.getType(C.getASTContext());
+SVal getPointeeOf(ASTContext &ASTCtx, const ProgramStateRef State, Loc LValue) {
+ const QualType ArgTy = LValue.getType(ASTCtx);
if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
- return C.getState()->getSVal(LValue);
+ return State->getSVal(LValue);
// Do not dereference void pointers. Treat them as byte pointers instead.
// FIXME: we might want to consider more than just the first byte.
- return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
+ return State->getSVal(LValue, ASTCtx.CharTy);
}
/// Given a pointer/reference argument, return the value it refers to.
-std::optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg) {
+std::optional<SVal> getPointeeOf(ASTContext &ASTCtx,
+ const ProgramStateRef State, SVal Arg) {
if (auto LValue = Arg.getAs<Loc>())
- return getPointeeOf(C, *LValue);
+ return getPointeeOf(ASTCtx, State, *LValue);
return std::nullopt;
}
/// Given a pointer, return the SVal of its pointee or if it is tainted,
/// otherwise return the pointer's SVal if tainted.
/// Also considers stdin as a taint source.
-std::optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C,
+std::optional<SVal> getTaintedPointeeOrPointer(ASTContext &ASTCtx,
+ const ProgramStateRef State,
SVal Arg) {
- const ProgramStateRef State = C.getState();
-
- if (auto Pointee = getPointeeOf(C, Arg))
+ if (auto Pointee = getPointeeOf(ASTCtx, State, Arg))
if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None;
return Pointee;
if (isTainted(State, Arg))
return Arg;
+ return std::nullopt;
+}
- // FIXME: This should be done by the isTainted() API.
- if (isStdin(Arg, C.getASTContext()))
- return Arg;
+bool isTaintedOrPointsToTainted(ASTContext &ASTCtx,
+ const ProgramStateRef &State, SVal ExprSVal) {
+ return getTaintedPointeeOrPointer(ASTCtx, State, ExprSVal).has_value();
+}
- return std::nullopt;
+/// Helps in printing taint diagnostics.
+/// Marks the incoming parameters of a function interesting (to be printed)
+/// when the return value, or the outgoing parameters are tainted.
+const NoteTag *taintOriginTrackerTag(CheckerContext &C,
+ std::vector<SymbolRef> TaintedSymbols,
+ std::vector<ArgIdxTy> TaintedArgs,
+ const LocationContext *CallLocation) {
+ assert(TaintedSymbols.size() == TaintedArgs.size());
+ return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
+ TaintedArgs = std::move(TaintedArgs), CallLocation](
+ PathSensitiveBugReport &BR) -> std::string {
+ SmallString<256> Msg;
+ // We give diagnostics only for taint related reports
+ if (!BR.isInteresting(CallLocation) ||
+ BR.getBugType().getCategory() != categories::TaintedData) {
+ return "";
+ }
+ if (TaintedSymbols.empty()) {
+ return "Taint originated here";
+ }
+ for (auto Sym : llvm::enumerate(TaintedSymbols)) {
+ LLVM_DEBUG(llvm::dbgs() << "Taint Propagated from argument "
+ << TaintedArgs.at(Sym.index()) + 1 << "\n");
+ BR.markInteresting(Sym.value());
+ }
+ return "";
+ });
}
-bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State,
- CheckerContext &C) {
- return getTaintedPointeeOrPointer(C, C.getSVal(E)).has_value();
+/// Helps in printing taint diagnostics.
+/// Marks the function interesting (to be printed)
+/// when the return value, or the outgoing parameters are tainted.
+const NoteTag *taintPropagationExplainerTag(
+ CheckerContext &C, std::vector<SymbolRef> TaintedSymbols,
+ std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) {
+ assert(TaintedSymbols.size() == TaintedArgs.size());
+ return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
+ TaintedArgs = std::move(TaintedArgs), CallLocation](
+ PathSensitiveBugReport &BR) -> std::string {
+ SmallString<256> Msg;
+ llvm::raw_svector_ostream Out(Msg);
+ // We give diagnostics only for taint related reports
+ if (TaintedSymbols.empty() ||
+ BR.getBugType().getCategory() != categories::TaintedData) {
+ return "";
+ }
+ int nofTaintedArgs = 0;
+ for (auto Sym : llvm::enumerate(TaintedSymbols)) {
+ if (BR.isInteresting(Sym.value())) {
+ BR.markInteresting(CallLocation);
+ if (TaintedArgs.at(Sym.index()) != ReturnValueIndex) {
+ LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument "
+ << TaintedArgs.at(Sym.index()) + 1 << "\n");
+ if (nofTaintedArgs == 0)
+ Out << "Taint propagated to argument "
+ << TaintedArgs.at(Sym.index()) + 1;
+ else
+ Out << ", " << TaintedArgs.at(Sym.index()) + 1;
+ nofTaintedArgs++;
+ } else {
+ LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n");
+ Out << "Taint propagated to the return value";
+ }
+ }
+ }
+ return std::string(Out.str());
+ });
}
/// ArgSet is used to describe arguments relevant for taint detection or
@@ -193,7 +258,7 @@
ArgSet SinkArgs;
/// Arguments which should be sanitized on function return.
ArgSet FilterArgs;
- /// Arguments which can participate in taint propagationa. If any of the
+ /// Arguments which can participate in taint propagation. If any of the
/// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should
/// be tainted.
ArgSet PropSrcArgs;
@@ -343,7 +408,7 @@
CheckerContext &C) const;
private:
- const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"};
+ const BugType BT{this, "Use of Untrusted Data", categories::TaintedData};
bool checkUncontrolledFormatString(const CallEvent &Call,
CheckerContext &C) const;
@@ -351,7 +416,7 @@
void taintUnsafeSocketProtocol(const CallEvent &Call,
CheckerContext &C) const;
- /// Default taint rules are initilized with the help of a CheckerContext to
+ /// Default taint rules are initalized with the help of a CheckerContext to
/// access the names of built-in functions like memcpy.
void initTaintRules(CheckerContext &C) const;
@@ -788,22 +853,42 @@
llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n';
});
+ const NoteTag *InjectionTag = nullptr;
+ std::vector<SymbolRef> TaintedSymbols;
+ std::vector<ArgIdxTy> TaintedIndexes;
for (ArgIdxTy ArgNum : *TaintArgs) {
// Special handling for the tainted return value.
if (ArgNum == ReturnValueIndex) {
State = addTaint(State, Call.getReturnValue());
+ SymbolRef TaintedSym = isTainted(State, Call.getReturnValue());
+ if (TaintedSym) {
+ TaintedSymbols.push_back(TaintedSym);
+ TaintedIndexes.push_back(ArgNum);
+ }
continue;
}
-
// The arguments are pointer arguments. The data they are pointing at is
// tainted after the call.
- if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum)))
+ if (auto V =
+ getPointeeOf(C.getASTContext(), State, Call.getArgSVal(ArgNum))) {
State = addTaint(State, *V);
+ // FIXME: The call argument may be a complex expression
+ // referring to multiple tainted variables.
+ // Now we generate notes and track back only one of them.
+ SymbolRef TaintedSym = isTainted(State, *V);
+ if (TaintedSym) {
+ TaintedSymbols.push_back(TaintedSym);
+ TaintedIndexes.push_back(ArgNum);
+ }
+ }
}
-
+ // Create a NoteTag callback, which prints to the user where the taintedness
+ // was propagated to.
+ InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedIndexes,
+ Call.getCalleeStackFrame(0));
// Clear up the taint info from the state.
State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
- C.addTransition(State);
+ C.addTransition(State, InjectionTag);
}
void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
@@ -826,7 +911,12 @@
/// Check for taint sinks.
ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {
- if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C))
+ // Add taintedness to stdin parameters
+ if (isStdin(C.getSVal(E), C.getASTContext())) {
+ State = addTaint(State, C.getSVal(E));
+ }
+ if (SinkArgs.contains(I) &&
+ isTaintedOrPointsToTainted(C.getASTContext(), State, C.getSVal(E)))
Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);
});
@@ -834,7 +924,7 @@
ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) {
if (FilterArgs.contains(I)) {
State = removeTaint(State, S);
- if (auto P = getPointeeOf(C, S))
+ if (auto P = getPointeeOf(C.getASTContext(), State, S))
State = removeTaint(State, *P);
}
});
@@ -843,11 +933,25 @@
/// A rule is relevant if PropSrcArgs is empty, or if any of its signified
/// args are tainted in context of the current CallEvent.
bool IsMatching = PropSrcArgs.isEmpty();
- ForEachCallArg(
- [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) {
- IsMatching = IsMatching || (PropSrcArgs.contains(I) &&
- isTaintedOrPointsToTainted(E, State, C));
- });
+ std::vector<SymbolRef> TaintedSymbols;
+ std::vector<ArgIdxTy> TaintedIndexes;
+ ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols,
+ &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) {
+ IsMatching =
+ IsMatching ||
+ (PropSrcArgs.contains(I) &&
+ isTaintedOrPointsToTainted(C.getASTContext(), State, C.getSVal(E)));
+ std::optional<SVal> TaintedSVal =
+ getTaintedPointeeOrPointer(C.getASTContext(), State, C.getSVal(E));
+
+ // We track back tainted arguments except for stdin
+ if (TaintedSVal && !isStdin(*TaintedSVal, C.getASTContext())) {
+ if (SymbolRef SR = isTainted(State, *TaintedSVal)) {
+ TaintedSymbols.push_back(SR);
+ TaintedIndexes.push_back(I);
+ }
+ }
+ });
if (!IsMatching)
return;
@@ -890,7 +994,9 @@
if (!Result.isEmpty())
State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
- C.addTransition(State);
+ const NoteTag *InjectionTag = taintOriginTrackerTag(
+ C, TaintedSymbols, TaintedIndexes, Call.getCalleeStackFrame(0));
+ C.addTransition(State, InjectionTag);
}
bool GenericTaintRule::UntrustedEnv(CheckerContext &C) {
@@ -902,7 +1008,8 @@
bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
CheckerContext &C) const {
assert(E);
- std::optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))};
+ std::optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(
+ C.getASTContext(), C.getState(), C.getSVal(E))};
if (!TaintedSVal)
return false;
@@ -911,7 +1018,9 @@
if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
report->addRange(E->getSourceRange());
- report->addVisitor(std::make_unique<TaintBugVisitor>(*TaintedSVal));
+ if (SymbolRef TaintedSym = isTainted(C.getState(), *TaintedSVal))
+ report->markInteresting(TaintedSym);
+
C.emitReport(std::move(report));
return true;
}
Index: clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -25,9 +25,9 @@
namespace {
class DivZeroChecker : public Checker< check::PreStmt<BinaryOperator> > {
- mutable std::unique_ptr<BuiltinBug> BT;
- void reportBug(const char *Msg, ProgramStateRef StateZero, CheckerContext &C,
- std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
+ mutable std::unique_ptr<BugType> BT;
+ void reportBug(StringRef Msg, StringRef Category, ProgramStateRef StateZero,
+ CheckerContext &C, SymbolRef TaintedSymbol = nullptr) const;
public:
void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
@@ -41,16 +41,17 @@
return nullptr;
}
-void DivZeroChecker::reportBug(
- const char *Msg, ProgramStateRef StateZero, CheckerContext &C,
- std::unique_ptr<BugReporterVisitor> Visitor) const {
+void DivZeroChecker::reportBug(StringRef Msg, StringRef Category,
+ ProgramStateRef StateZero, CheckerContext &C,
+ SymbolRef TaintedSymbol) const {
if (ExplodedNode *N = C.generateErrorNode(StateZero)) {
if (!BT)
- BT.reset(new BuiltinBug(this, "Division by zero"));
+ BT.reset(new BugType(this, "Division by zero", Category));
auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
- R->addVisitor(std::move(Visitor));
bugreporter::trackExpressionValue(N, getDenomExpr(N), *R);
+ if (TaintedSymbol)
+ R->markInteresting(TaintedSymbol);
C.emitReport(std::move(R));
}
}
@@ -82,15 +83,16 @@
if (!stateNotZero) {
assert(stateZero);
- reportBug("Division by zero", stateZero, C);
+ reportBug("Division by zero", categories::LogicError, stateZero, C);
return;
}
- bool TaintedD = isTainted(C.getState(), *DV);
- if ((stateNotZero && stateZero && TaintedD)) {
- reportBug("Division by a tainted value, possibly zero", stateZero, C,
- std::make_unique<taint::TaintBugVisitor>(*DV));
- return;
+ if ((stateNotZero && stateZero)) {
+ if (SymbolRef TaintedSym = isTainted(C.getState(), *DV)) {
+ reportBug("Division by a tainted value, possibly zero",
+ categories::TaintedData, stateZero, C, TaintedSym);
+ return;
+ }
}
// If we get here, then the denom should not be zero. We abandon the implicit
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -32,12 +32,12 @@
namespace {
class ArrayBoundCheckerV2 :
public Checker<check::Location> {
- mutable std::unique_ptr<BuiltinBug> BT;
+ mutable std::unique_ptr<BugType> BT;
enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted };
void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind,
- std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
+ SymbolRef TaintedSymbol = nullptr) const;
public:
void checkLocation(SVal l, bool isLoad, const Stmt*S,
@@ -206,9 +206,8 @@
// If we are under constrained and the index variables are tainted, report.
if (state_exceedsUpperBound && state_withinUpperBound) {
SVal ByteOffset = rawOffset.getByteOffset();
- if (isTainted(state, ByteOffset)) {
- reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted,
- std::make_unique<TaintBugVisitor>(ByteOffset));
+ if (SymbolRef TSR = isTainted(state, ByteOffset)) {
+ reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, TSR);
return;
}
} else if (state_exceedsUpperBound) {
@@ -227,16 +226,16 @@
checkerContext.addTransition(state);
}
-void ArrayBoundCheckerV2::reportOOB(
- CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind,
- std::unique_ptr<BugReporterVisitor> Visitor) const {
+void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
+ ProgramStateRef errorState, OOB_Kind kind,
+ SymbolRef TaintedSymbol) const {
ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
if (!errorNode)
return;
-
- if (!BT)
- BT.reset(new BuiltinBug(this, "Out-of-bound access"));
+ BT.reset(new BugType(this, "Out-of-bound access",
+ kind == OOB_Tainted ? categories::TaintedData
+ : categories::LogicError));
// FIXME: This diagnostics are preliminary. We should get far better
// diagnostics for explaining buffer overruns.
@@ -257,7 +256,9 @@
}
auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
- BR->addVisitor(std::move(Visitor));
+ if (TaintedSymbol)
+ BR->markInteresting(TaintedSymbol);
+
checkerContext.emitReport(std::move(BR));
}
Index: clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
+++ clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
@@ -22,6 +22,7 @@
extern const char *const CXXMoveSemantics;
extern const char *const SecurityError;
extern const char *const UnusedCode;
+extern const char *const TaintedData;
} // namespace categories
} // namespace ento
} // namespace clang
Index: clang/include/clang/StaticAnalyzer/Checkers/Taint.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Checkers/Taint.h
+++ clang/include/clang/StaticAnalyzer/Checkers/Taint.h
@@ -62,43 +62,27 @@
TaintTagType Kind = TaintTagGeneric);
/// Check if the statement has a tainted value in the given state.
-bool isTainted(ProgramStateRef State, const Stmt *S,
- const LocationContext *LCtx,
- TaintTagType Kind = TaintTagGeneric);
+SymbolRef isTainted(ProgramStateRef State, const Stmt *S,
+ const LocationContext *LCtx,
+ TaintTagType Kind = TaintTagGeneric);
/// Check if the value is tainted in the given state.
-bool isTainted(ProgramStateRef State, SVal V,
- TaintTagType Kind = TaintTagGeneric);
+SymbolRef isTainted(ProgramStateRef State, SVal V,
+ TaintTagType Kind = TaintTagGeneric);
/// Check if the symbol is tainted in the given state.
-bool isTainted(ProgramStateRef State, SymbolRef Sym,
- TaintTagType Kind = TaintTagGeneric);
+SymbolRef isTainted(ProgramStateRef State, SymbolRef Sym,
+ TaintTagType Kind = TaintTagGeneric);
/// Check if the pointer represented by the region is tainted in the given
/// state.
-bool isTainted(ProgramStateRef State, const MemRegion *Reg,
- TaintTagType Kind = TaintTagGeneric);
+SymbolRef isTainted(ProgramStateRef State, const MemRegion *Reg,
+ TaintTagType Kind = TaintTagGeneric);
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl = "\n",
const char *sep = "");
LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State);
-
-/// The bug visitor prints a diagnostic message at the location where a given
-/// variable was tainted.
-class TaintBugVisitor final : public BugReporterVisitor {
-private:
- const SVal V;
-
-public:
- TaintBugVisitor(const SVal V) : V(V) {}
- void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
-
- PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
- BugReporterContext &BRC,
- PathSensitiveBugReport &BR) override;
-};
-
} // namespace taint
} // namespace ento
} // namespace clang
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits