Szelethus created this revision. Szelethus added reviewers: NoQ, george.karpenkov, xazax.hun, rnkovacs, dcoughlin, MTC, dkrupp. Herald added subscribers: cfe-commits, mikhail.ramalho, a.sidorin, szepet, whisperity.
This is the implementation of the inclusion of macro expansions into the plist output. This approach is very much different to what `HTMLRewrite` does -- the motivation behind this was that - I **really** wanted to avoid `const_cast`, - This patch aims to only expand macros relevant to the bugpath, so there's no point in lexing everything. Sadly, it seems like I couldn't get away with a solution not re-implementing parts of the preprocessor -- macro arguments are expanded manually and somewhat painfully, but I am very confident about it's stability and correctness. I included plenty of comments, maybe a little too much, but my reasoning was that the future maintainer of this code probably won't be expert on how to handle the preprocessor and the lexer, so judging from how much I struggled with it, I figured the extra explanation was justified. There are still some tests I'd like to perform, and some small details are still under debate (such as whether the kind of the plist entry will be `macro_expansion` or not), but the actual implementation of how macros are expanded won't be changed (unless I encounter a crash). Repository: rC Clang https://reviews.llvm.org/D52742 Files: include/clang/StaticAnalyzer/Core/AnalyzerOptions.h lib/StaticAnalyzer/Core/AnalyzerOptions.cpp lib/StaticAnalyzer/Core/BugReporter.cpp lib/StaticAnalyzer/Core/PlistDiagnostics.cpp test/Analysis/plist-macros-with-expansion.cpp
Index: test/Analysis/plist-macros-with-expansion.cpp =================================================================== --- /dev/null +++ test/Analysis/plist-macros-with-expansion.cpp @@ -0,0 +1,152 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s +// +// RUN: %clang_analyze_cc1 -analyzer-checker=core %s \ +// RUN: -analyzer-output=plist -o %t.plist \ +// RUN: -analyzer-config expand-macros=true \ +// RUN: +// RUN: FileCheck --input-file=%t.plist %s + +void print(const void*); + +//===----------------------------------------------------------------------===// +// Tests for non-function-like macro expansions. +//===----------------------------------------------------------------------===// + +#define SET_PTR_VAR_TO_NULL \ + ptr = 0 + +void nonFunctionLikeMacroTest() { + int *ptr; + SET_PTR_VAR_TO_NULL; + *ptr = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'SET_PTR_VAR_TO_NULL' to 'ptr = 0 '</string> + +#define NULL 0 +#define SET_PTR_VAR_TO_NULL_WITH_NESTED_MACRO \ + ptr = NULL + +void nonFunctionLikeNestedMacroTest() { + int *ptr; + SET_PTR_VAR_TO_NULL_WITH_NESTED_MACRO; + *ptr = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'SET_PTR_VAR_TO_NULL' to 'ptr = 0 '</string> + +//===----------------------------------------------------------------------===// +// Tests for function-like macro expansions. +//===----------------------------------------------------------------------===// + +void setToNull(int **vptr) { + *vptr = nullptr; +} + +#define TO_NULL(x) \ + setToNull(x) + +void functionLikeMacroTest() { + int *ptr; + TO_NULL(&ptr); + *ptr = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'TO_NULL' to 'setToNull ( & a ) '</string> + +#define DOES_NOTHING(x) \ + { \ + int b; \ + b = 5; \ + } \ + print(x) + +#define DEREF(x) \ + DOES_NOTHING(x); \ + *x + +void functionLikeNestedMacroTest() { + int *a; + TO_NULL(&a); + DEREF(a) = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'TO_NULL' to 'setToNull ( & a ) '</string> + +// CHECK: <string>Expanding macro 'DEREF' to '{ int b ; b = 5 ; } print ( a ) ; * a '</string> + +//===----------------------------------------------------------------------===// +// Tests for macro arguments containing commas and parantheses. +// +// As of writing these tests, the algorithm expands macro arguments by lexing +// the macro's expansion location, and relies on finding tok::comma and +// tok::l_paren/tok::r_paren. +//===----------------------------------------------------------------------===// + +// Note that this commas, parantheses in strings aren't parsed as tok::comma or +// tok::l_paren/tok::r_paren, but why not test them. + +#define TO_NULL_AND_PRINT(x, str) \ + x = 0; \ + print(str) + +void macroArgContainsCommaInStringTest() { + int *a; + TO_NULL_AND_PRINT(a, "Will this , cause a crash?"); + *a = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'TO_NULL_AND_PRINT' to 'a = 0 ; print ( "Will this , cause a crash?" ) '</string> + +void macroArgContainsLParenInStringTest() { + int *a; + TO_NULL_AND_PRINT(a, "Will this ( cause a crash?"); + *a = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'TO_NULL_AND_PRINT' to 'a = 0 ; print ( "Will this ( cause a crash?" ) '</string> + +void macroArgContainsRParenInStringTest() { + int *a; + TO_NULL_AND_PRINT(a, "Will this ) cause a crash?"); + *a = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'TO_NULL_AND_PRINT' to 'a = 0 ; print ( "Will this ) cause a crash?" ) '</string> + +#define CALL_FUNCTION(funcCall) \ + funcCall + +// Function calls do contain both tok::comma and tok::l_paren/tok::r_paren. + +void macroArgContainsLParenRParenTest() { + int *a; + CALL_FUNCTION(setToNull(&a)); + *a = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'CALL_FUNCTION' to 'setToNull ( & a ) '</string> + +void setToNullAndPrint(int **vptr, const char *str) { + setToNull(vptr); + print(str); +} + +void macroArgContainsCommaLParenRParenTest() { + int *a; + CALL_FUNCTION(setToNullAndPrint(&a, "Hello!")); + *a = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'CALL_FUNCTION' to 'setToNullAndPrint ( & a , "Hello!" ) '</string> + +#define CALL_FUNCTION_WITH_TWO_PARAMS(funcCall, param1, param2) \ + funcCall(param1, param2) + +void macroArgContainsCommaLParenRParenTest2() { + int *a; + CALL_FUNCTION_WITH_TWO_PARAMS(setToNullAndPrint, &a, "Hello!"); + *a = 5; // expected-warning{{Dereference of null pointer}} +} + +// CHECK: <string>Expanding macro 'CALL_FUNCTION_WITH_TWO_PARAMS' to 'setToNullAndPrint ( & a , "Hello!" ) '</string> Index: lib/StaticAnalyzer/Core/PlistDiagnostics.cpp =================================================================== --- lib/StaticAnalyzer/Core/PlistDiagnostics.cpp +++ lib/StaticAnalyzer/Core/PlistDiagnostics.cpp @@ -253,15 +253,63 @@ /*includeControlFlow*/ true); } +namespace { + +struct ExpansionInfo { + std::string MacroName; + std::string Expansion; + ExpansionInfo(std::string N, std::string E) : MacroName(N), Expansion(E) {} +}; + +} // end of anonymous namespace + +static ExpansionInfo getExpandedMacro(SourceLocation MacroLoc, + const Preprocessor &PP); + static void ReportMacro(raw_ostream &o, - const PathDiagnosticMacroPiece& P, - const FIDMap& FM, + const PathDiagnosticMacroPiece &P, + const FIDMap &FM, const Preprocessor &PP, unsigned indent, unsigned depth) { + const SourceManager &SM = PP.getSourceManager(); - for (PathPieces::const_iterator I = P.subPieces.begin(), E=P.subPieces.end(); - I!=E; ++I) { + llvm::SmallString<50> MacroMessage; + llvm::raw_svector_ostream MacroOS(MacroMessage); + + { + ExpansionInfo EI = getExpandedMacro(P.getLocation().asLocation(), PP); + + MacroOS << "Expanding macro '" << EI.MacroName << "' to '" + << EI.Expansion << '\''; + } + + Indent(o, indent) << "<dict>\n"; + ++indent; + + Indent(o, indent) << "<key>kind</key><string>macro_expansion</string>\n"; + + // Output the location. + FullSourceLoc L = P.getLocation().asLocation(); + + Indent(o, indent) << "<key>location</key>\n"; + EmitLocation(o, SM, L, FM, indent); + + // Output the ranges (if any). + ArrayRef<SourceRange> Ranges = P.getRanges(); + EmitRanges(o, Ranges, FM, PP, indent); + + // Output the text. + EmitMessage(o, MacroOS.str(), indent); + + // Finish up. + --indent; + Indent(o, indent); + o << "</dict>\n"; + + for (PathPieces::const_iterator I = P.subPieces.begin(), + E = P.subPieces.end(); + I != E; ++I) { ReportPiece(o, **I, FM, PP, indent, depth, /*includeControlFlow*/ false); } } @@ -606,3 +654,251 @@ // Finish. o << "</dict>\n</plist>"; } + +//===----------------------------------------------------------------------===// +// Helper functions and data structures for expanding macros. +//===----------------------------------------------------------------------===// + +namespace { + +using ExpArgTokens = llvm::SmallVector<Token, 2>; + +/// Maps unexpanded macro arguments to expanded arguments. A macro argument may +/// need to expanded further when it is nested inside another macro. +class MacroArgsInfo : public std::map<const IdentifierInfo *, ExpArgTokens> { +public: + void expandFromPrevMacro(const MacroArgsInfo &Super); +}; + +struct MacroNameAndArgsInfo { + std::string Name; + const MacroInfo *MI = nullptr; + llvm::Optional<MacroArgsInfo> MacroArgMap; + + MacroNameAndArgsInfo(std::string N, const MacroInfo *MI, + llvm::Optional<MacroArgsInfo> M) + : Name(N), MI(MI), MacroArgMap(M) {} +}; + +} // end of anonymous namespace + +/// Retrieves the name of the macro and what it's arguments expand into +/// at \p ExpanLoc. +/// +/// For example, for the following macro expansion: +/// +/// #define SET_TO_NULL(x) x = 0 +/// #define NOT_SUSPICIOUS(a) \ +/// { \ +/// int b = 0; \ +/// } \ +/// SET_TO_NULL(a) +/// +/// int *ptr = new int(4); +/// NOT_SUSPICIOUS(&ptr); +/// *ptr = 5; +/// +/// When \p ExpanLoc references the last line, the macro name "NOT_SUSPICIOUS" +/// and the MacroArgsInfo map { (a, &ptr) } will be returned. +/// +/// When \p ExpanLoc references "SET_TO_NULL(a)" within the definition of +/// "NOT_SUSPICOUS", the macro name "SET_TO_NULL" and the MacroArgsInfo map +/// { (x, a) } will be returned. +static MacroNameAndArgsInfo getMacroNameAndArgsInfo(SourceLocation ExpanLoc, + const Preprocessor &PP); + +static ExpansionInfo getExpandedMacroImpl(SourceLocation MacroLoc, + const Preprocessor &PP, + MacroArgsInfo *PrevArgMap) { + + const SourceManager &SM = PP.getSourceManager(); + + MacroNameAndArgsInfo Info = getMacroNameAndArgsInfo( + SM.getExpansionLoc(MacroLoc), PP); + + std::string MacroName = std::move(Info.Name); + const MacroInfo *MI = Info.MI; + llvm::Optional<MacroArgsInfo> MacroArgMap = std::move(Info.MacroArgMap); + + // If this macro function-like. + if (MacroArgMap) { + // If this macro is nested inside another one, let's manually expand it's + // arguments from the "super" macro. + if (PrevArgMap) + MacroArgMap->expandFromPrevMacro(*PrevArgMap); + PrevArgMap = MacroArgMap.getPointer(); + } else + PrevArgMap = nullptr; + + // Iterate over the macro's tokens and stringify them. + llvm::SmallString<200> ExpansionBuf; + llvm::raw_svector_ostream ExpansionOS(ExpansionBuf); + + for (auto It = MI->tokens_begin(), E = MI->tokens_end(); It != E;) { + Token T = *It; + + if (T.is(tok::identifier)) { + const auto *II = T.getIdentifierInfo(); + assert(II && + "This token is an identifier but has no IdentifierInfo!"); + + // If this token is a macro that should be expanded inside the currect + // macro. + if (const MacroInfo *MI = PP.getMacroInfo(II)) { + ExpansionOS << getExpandedMacroImpl( + T.getLocation(), PP, PrevArgMap).Expansion; + + // If this is a function-like macro, skip its arguments, as + // getExpandedMacro() already printed them. + ++It; + if (It->is(tok::l_paren)) { + while ((++It)->isNot(tok::r_paren)) { + assert(It->isNot(tok::eof)); + } + ++It; + } + continue; + } + + // If this token is the current macro's argument, we should expand it. + if (MacroArgMap && MacroArgMap->count(II)) { + for (Token ExpandedArgT : MacroArgMap->at(II)) { + ExpansionOS << PP.getSpelling(ExpandedArgT) + ' '; + } + ++It; + continue; + } + } + + // If control reaches here, there's nothing left to do, print the token. + ExpansionOS << PP.getSpelling(T) + ' '; + ++It; + } + + return {MacroName, ExpansionOS.str()}; +} + +static ExpansionInfo getExpandedMacro(SourceLocation MacroLoc, + const Preprocessor &PP) { + return getExpandedMacroImpl(MacroLoc, PP, /* PrevArgMap */ nullptr); +} + +static MacroNameAndArgsInfo getMacroNameAndArgsInfo(SourceLocation ExpanLoc, + const Preprocessor &PP) { + + const SourceManager &SM = PP.getSourceManager(); + const LangOptions &LangOpts = PP.getLangOpts(); + + // First, we create a Lexer to lex *at the expansion location* the tokens + // referring to the macro's name and its arguments. + StringRef BufferInfo = SM.getBufferData(SM.getFileID(ExpanLoc)); + std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(ExpanLoc); + const char *MacroNameBuf = LocInfo.second + BufferInfo.data(); + + Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, + BufferInfo.begin(), MacroNameBuf, BufferInfo.end()); + + // Acquire the macro's name. + Token TheTok; + RawLexer.LexFromRawLexer(TheTok); + + StringRef MacroName(MacroNameBuf, TheTok.getLength()); + + // Acquire the macro's arguments. + // + // The rough idea here is to lex from the first left parantheses to the last + // right parantheses, and map the macro's unexpanded arguments to what they + // will be expanded to. An expanded macro argument may contain several tokens + // (like '3 + 4'), so we'll lex until we find a tok::comma or tok::r_paren, at + // which point we start lexing the next argument or finish. + const auto *II = PP.getIdentifierInfo(MacroName); + assert(II && "Failed to acquire the IndetifierInfo for the macro!"); + const MacroInfo *MI = PP.getMacroInfo(II); + assert(MI && "This IdentifierInfo should refer to a macro!"); + + ArrayRef<const IdentifierInfo *> MacroArgs = MI->params(); + if (MacroArgs.empty()) + return { MacroName, MI, None }; + + RawLexer.LexFromRawLexer(TheTok); + assert(TheTok.is(tok::l_paren) && + "The token after the macro's identifier token should be '('!"); + + MacroArgsInfo MAI; + unsigned Index = 0; + + // When the macro's argument is a function call, like + // CALL_FN(someFunctionName(param1, param2)) + // we will find tok::l_paren, tok::r_paren, and tok::comma that do not divide + // actual macro arguments, or do not represent the macro argument's closing + // parantheses, so we'll count how many parantheses aren't closed yet. + int ParanthesesDepth = 1; + + while (Index < MI->getNumParams()) { + MacroArgsInfo::mapped_type ExpandedArgTokens; + + // Lex the first token of the next macro parameter. + RawLexer.LexFromRawLexer(TheTok); + + while (TheTok.isNot(tok::comma) || ParanthesesDepth != 1) { + assert(TheTok.isNot(tok::eof) && + "EOF encountered while looking for expanded macro args!"); + + if (TheTok.is(tok::l_paren)) + ++ParanthesesDepth; + + if (TheTok.is(tok::r_paren)) + --ParanthesesDepth; + + if (ParanthesesDepth == 0) + break; + + if (TheTok.is(tok::raw_identifier)) + PP.LookUpIdentifierInfo(TheTok); + + ExpandedArgTokens.push_back(TheTok); + RawLexer.LexFromRawLexer(TheTok); + } + + MAI.insert(std::make_pair(MacroArgs[Index++], ExpandedArgTokens)); + } + + assert(TheTok.is(tok::r_paren) && + "Expanded macro argument acquisition failed! After the end of the loop" + " this token should be ')'!"); + + return {MacroName, MI, MAI}; +} + +void MacroArgsInfo::expandFromPrevMacro(const MacroArgsInfo &Super) { + + for (value_type &Pair : *this) { + ExpArgTokens &CurrExpArgTokens = Pair.second; + + // For each token in the expanded macro argument. + auto It = CurrExpArgTokens.begin(); + while (It != CurrExpArgTokens.end()) { + if (It->isNot(tok::identifier)) { + ++It; + continue; + } + + const auto *II = It->getIdentifierInfo(); + assert(II); + + // Is this an argument that "Super" expands further? + if (!Super.count(II)) { + ++It; + continue; + } + + const ExpArgTokens &SuperExpArgTokens = Super.at(II); + assert(!SuperExpArgTokens.empty()); + + It = CurrExpArgTokens.insert( + It, SuperExpArgTokens.begin(), SuperExpArgTokens.end()); + std::advance(It, SuperExpArgTokens.size()); + It = CurrExpArgTokens.erase(It); + } + } +} Index: lib/StaticAnalyzer/Core/BugReporter.cpp =================================================================== --- lib/StaticAnalyzer/Core/BugReporter.cpp +++ lib/StaticAnalyzer/Core/BugReporter.cpp @@ -546,7 +546,8 @@ } } -static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM); +static void CompactMacroExpandedPieces(PathPieces &path, + const SourceManager& SM); std::shared_ptr<PathDiagnosticControlFlowPiece> generateDiagForSwitchOP( @@ -1978,8 +1979,6 @@ PathDiagnosticLocation::createBegin(D, SM), CalleeLC); } - if (!AddPathEdges && GenerateDiagnostics) - CompactPathDiagnostic(PD->getMutablePieces(), SM); // Finally, prune the diagnostic path of uninteresting stuff. if (!PD->path.empty()) { @@ -2013,6 +2012,10 @@ removeRedundantMsgs(PD->getMutablePieces()); removeEdgesToDefaultInitializers(PD->getMutablePieces()); } + + if (GenerateDiagnostics && Opts.shouldDisplayMacroExpansions()) + CompactMacroExpandedPieces(PD->getMutablePieces(), SM); + return PD; } @@ -2442,9 +2445,10 @@ return true; } -/// CompactPathDiagnostic - This function postprocesses a PathDiagnostic object -/// and collapses PathDiagosticPieces that are expanded by macros. -static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM) { +/// CompactMacroExpandedPieces - This function postprocesses a PathDiagnostic +/// object and collapses PathDiagosticPieces that are expanded by macros. +static void CompactMacroExpandedPieces(PathPieces &path, + const SourceManager& SM) { using MacroStackTy = std::vector< std::pair<std::shared_ptr<PathDiagnosticMacroPiece>, SourceLocation>>; @@ -2460,7 +2464,7 @@ // Recursively compact calls. if (auto *call = dyn_cast<PathDiagnosticCallPiece>(&*piece)) { - CompactPathDiagnostic(call->path, SM); + CompactMacroExpandedPieces(call->path, SM); } // Get the location of the PathDiagnosticPiece. Index: lib/StaticAnalyzer/Core/AnalyzerOptions.cpp =================================================================== --- lib/StaticAnalyzer/Core/AnalyzerOptions.cpp +++ lib/StaticAnalyzer/Core/AnalyzerOptions.cpp @@ -463,6 +463,13 @@ return DisplayNotesAsEvents.getValue(); } +bool AnalyzerOptions::shouldDisplayMacroExpansions() { + if (!DisplayMacroExpansions.hasValue()) + DisplayMacroExpansions = + getBooleanOption("expand-macros", /*Default=*/false); + return DisplayMacroExpansions.getValue(); +} + bool AnalyzerOptions::shouldAggressivelySimplifyBinaryOperation() { if (!AggressiveBinaryOperationSimplification.hasValue()) AggressiveBinaryOperationSimplification = Index: include/clang/StaticAnalyzer/Core/AnalyzerOptions.h =================================================================== --- include/clang/StaticAnalyzer/Core/AnalyzerOptions.h +++ include/clang/StaticAnalyzer/Core/AnalyzerOptions.h @@ -308,6 +308,9 @@ /// \sa shouldDisplayNotesAsEvents Optional<bool> DisplayNotesAsEvents; + /// \sa shouldDisplayMacroExpansions + Optional<bool> DisplayMacroExpansions; + /// \sa shouldAggressivelySimplifyBinaryOperation Optional<bool> AggressiveBinaryOperationSimplification; @@ -683,6 +686,13 @@ /// to false when unset. bool shouldDisplayNotesAsEvents(); + /// Returns with true if macros related to the bugpath should be expanded and + /// included in the plist output. + /// + /// This is controlled by the 'expand-macros' option, which defaults to false + /// when unset. + bool shouldDisplayMacroExpansions(); + /// Returns true if SValBuilder should rearrange comparisons and additive /// operations of symbolic expressions which consist of a sum of a symbol and /// a concrete integer into the format where symbols are on the left-hand
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits