[PATCH] D49792: [ASTmporter] SourceRange-free function parameter checking for declarations
gerazo created this revision. gerazo added reviewers: a.sidorin, r.stahl. Herald added a subscriber: cfe-commits. The previous code which avoided infinite recursion (because of reparsing declarations in function parameter lists) contained SourceRange dependent code which had some problems when parameter types were coming from macros. The new solution is not using macros and therefore much safer. A couple of importer problems are fixed in redis and tmux by this fix. Various unittests are included. Repository: rC Clang https://reviews.llvm.org/D49792 Files: lib/AST/ASTImporter.cpp unittests/AST/ASTImporterTest.cpp Index: unittests/AST/ASTImporterTest.cpp === --- unittests/AST/ASTImporterTest.cpp +++ unittests/AST/ASTImporterTest.cpp @@ -989,7 +989,7 @@ " return 0;" "}", Lang_C, "input.c"); - auto FromVar = + auto *FromVar = FirstDeclMatcher().match(FromTU, varDecl(hasName("d"))); ASSERT_TRUE(FromVar); auto ToType = @@ -999,12 +999,41 @@ TEST_P(ASTImporterTestBase, ImportRecordDeclInFuncParams) { // This construct is not supported by ASTImporter. - Decl *FromTU = - getTuDecl("int declToImport(struct data_t{int a;int b;} *d){ return 0; }", -Lang_C, "input.c"); - auto From = FirstDeclMatcher().match(FromTU, functionDecl()); + Decl *FromTU = getTuDecl( + "int declToImport(struct data_t{int a;int b;} ***d){ return 0; }", + Lang_C, "input.c"); + auto *From = FirstDeclMatcher().match( + FromTU, functionDecl(hasName("declToImport"))); + ASSERT_TRUE(From); + auto *To = Import(From, Lang_C); + EXPECT_EQ(To, nullptr); +} + +TEST_P(ASTImporterTestBase, ImportRecordDeclInFuncFromMacro) { + Decl *FromTU = getTuDecl( + "#define NONAME_SIZEOF(type) sizeof(struct{type *dummy;}) \n" + "int declToImport(){ return NONAME_SIZEOF(int); }", + Lang_C, "input.c"); + auto *From = FirstDeclMatcher().match( + FromTU, functionDecl(hasName("declToImport"))); + ASSERT_TRUE(From); + auto *To = Import(From, Lang_C); + ASSERT_TRUE(To); + EXPECT_TRUE(MatchVerifier().match( + To, functionDecl(hasName("declToImport"), + hasDescendant(unaryExprOrTypeTraitExpr(); +} + +TEST_P(ASTImporterTestBase, ImportRecordDeclInFuncParamsFromMacro) { + // This construct is not supported by ASTImporter. + Decl *FromTU = getTuDecl( + "#define PAIR_STRUCT(type) struct data_t{type a;type b;} \n" + "int declToImport(PAIR_STRUCT(int) ***d){ return 0; }", + Lang_C, "input.c"); + auto *From = FirstDeclMatcher().match( + FromTU, functionDecl(hasName("declToImport"))); ASSERT_TRUE(From); - auto To = Import(From, Lang_C); + auto *To = Import(From, Lang_C); EXPECT_EQ(To, nullptr); } Index: lib/AST/ASTImporter.cpp === --- lib/AST/ASTImporter.cpp +++ lib/AST/ASTImporter.cpp @@ -1146,15 +1146,21 @@ FunctionDecl *FunDecl; if (isa(D) && (FunDecl = dyn_cast(OrigDC)) && FunDecl->hasBody()) { -SourceRange RecR = D->getSourceRange(); -SourceRange BodyR = FunDecl->getBody()->getSourceRange(); -// If RecordDecl is not in Body (it is a param), we bail out. -if (RecR.isValid() && BodyR.isValid() && -(RecR.getBegin() < BodyR.getBegin() || - BodyR.getEnd() < RecR.getEnd())) { - Importer.FromDiag(D->getLocation(), diag::err_unsupported_ast_node) - << D->getDeclKindName(); - return true; +auto getLeafPointeeType = [](const Type *T) { + while (T->isPointerType() || T->isArrayType()) { +T = T->getPointeeOrArrayElementType(); + } + return T; +}; +for (const ParmVarDecl *P : FunDecl->parameters()) { + const Type *LeafT = + getLeafPointeeType(P->getType().getCanonicalType().getTypePtr()); + auto *RT = dyn_cast(LeafT); + if (RT && RT->getDecl() == D) { +Importer.FromDiag(D->getLocation(), diag::err_unsupported_ast_node) +<< D->getDeclKindName(); +return true; + } } } Index: unittests/AST/ASTImporterTest.cpp === --- unittests/AST/ASTImporterTest.cpp +++ unittests/AST/ASTImporterTest.cpp @@ -989,7 +989,7 @@ " return 0;" "}", Lang_C, "input.c"); - auto FromVar = + auto *FromVar = FirstDeclMatcher().match(FromTU, varDecl(hasName("d"))); ASSERT_TRUE(FromVar); auto ToType = @@ -999,12 +999,41 @@ TEST_P(ASTImporterTestBase, ImportRecordDeclInFuncParams) { // This construct is not supported by ASTImporter. - Decl *FromTU = - getTuDecl("int declToImport(struct data_t{int a;int b;} *d){ return 0; }", -Lang_C, "input.c"); - auto From = FirstDeclMatcher().match(FromTU, fun
[PATCH] D30691: [analyzer] Support for naive cross translational unit analysis
gerazo added inline comments. Comment at: tools/scan-build-py/libscanbuild/analyze.py:702 + # To have good results from static analyzer certain compiler options shall be george.karpenkov wrote: > This blank line should not be in this PR. Scheduled to be done. Comment at: tools/scan-build-py/libscanbuild/report.py:270 +# Protect parsers from bogus report files coming from clang crashes +for filename in glob.iglob(os.path.join(output_dir, pattern)): george.karpenkov wrote: > I understand the intent here, but it seems it should be handled at a > different level: would it be hard to change Clang to only write the report > file at the very end, when no crash should be encountered? Or make parsers > not choke on empty fields? After careful investigation, I have to say, it would be very hard to tell that we've done this right in the current setup. Unlike the the rest of clang, ASTImporter.cpp is not a strong part of the system. It has a lot of ongoing fixes and probably more problems (missing CXX functionality) on the way. This is the part which makes CTU less reliable than clang generally. In order to elegantly survive a problem of this unit, we need to leave the other things intact and fix ASTImporter instead (this is an ongoing effort). Until then, this fix seems good enough. https://reviews.llvm.org/D30691 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27753: [analyzer] alpha.security.DirtyScalar Checker
gerazo added a comment. Before abandoning this patch and rewriting it, I would like to get a thumbs up for my plans: I will reimplement all functionality included here but without creating a new checker. Some parts which relate to specific checkers will be put into the corresponding checkers (like ArrayBoundCheckerV2). General ideas on taintedness (cleansing rules and usage warnings on standard types) will be put into GenericTaintChecker. We will see how it goes, will we have a smaller patch or not. WDYT? https://reviews.llvm.org/D27753 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D30691: [analyzer] Support for naive cross translational unit analysis
gerazo added a comment. In https://reviews.llvm.org/D30691#954740, @george.karpenkov wrote: > I've tried using the patch, and I got blocked at the following: CTU options > are only exposed when one goes through `analyze-build` frontend, which > requires `compile_commands.json` to be present. I've used `libear` to > generate `compile_commands.json`, but the generated JSON does not contain the > `command` field, which causes `@require` before `run` to die (also, due to > the passing style this error was unnecessarily difficult to debug). > So could you write a short documentation somewhere how all pieces fit > together? What entry point should be used, what should people do who don't > have a build system-generated `compile_commands.json`etc. etc. Basically this patch does not change the way scan-build-py is working. So you can either create a compile_commands.json using intercept-build and than use analyze-build to use the result. Or you can do the two together using scan-build. We only offer the CTU functionality in analyze-build, because CTU needs multiple runs anyway, so on the spot build action capture and analyze is not possible with CTU. The general usage of scan-build-py is written in clang/tools/scan-build-py/README.md https://reviews.llvm.org/D30691 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D47946: [ASTmporter] Fix infinite recursion on function import with struct definition in parameters
gerazo created this revision. gerazo added reviewers: a.sidorin, r.stahl. Herald added a subscriber: cfe-commits. Importing a function having a struct definition in the parameter list causes a crash in the importer via infinite recursion. This patch avoids the crash and reports such functions as not supported. Unit tests make sure that normal struct definitions inside function bodies work normally on the other hand and LLDB-like type imports also do. Repository: rC Clang https://reviews.llvm.org/D47946 Files: lib/AST/ASTImporter.cpp unittests/AST/ASTImporterTest.cpp Index: unittests/AST/ASTImporterTest.cpp === --- unittests/AST/ASTImporterTest.cpp +++ unittests/AST/ASTImporterTest.cpp @@ -168,11 +168,52 @@ std::string FileName; std::unique_ptr Unit; TranslationUnitDecl *TUDecl = nullptr; +std::unique_ptr Importer; + TU(StringRef Code, StringRef FileName, ArgVector Args) : Code(Code), FileName(FileName), Unit(tooling::buildASTFromCodeWithArgs(this->Code, Args, this->FileName)), - TUDecl(Unit->getASTContext().getTranslationUnitDecl()) {} + TUDecl(Unit->getASTContext().getTranslationUnitDecl()) { + BeginSourceFile(&*Unit); +} + +~TU() { EndSourceFile(&*Unit); } + +static void BeginSourceFile(ASTUnit *diagASTUnit) { + assert(diagASTUnit->getDiagnostics().getClient() && + diagASTUnit->getPreprocessorPtr() && + "Bad context for source file"); + diagASTUnit->getDiagnostics().getClient()->BeginSourceFile( + diagASTUnit->getASTContext().getLangOpts(), + &diagASTUnit->getPreprocessor()); +} + +static void EndSourceFile(ASTUnit *diagASTUnit) { + if (diagASTUnit->isMainFileAST() && + diagASTUnit->getDiagnostics().getClient()) { +diagASTUnit->getDiagnostics().getClient()->EndSourceFile(); + } +} + +void lazyInitImporter(ASTUnit *ToAST) { + assert(ToAST); + if (!Importer) { +Importer.reset(new ASTImporter( +ToAST->getASTContext(), ToAST->getFileManager(), +Unit->getASTContext(), Unit->getFileManager(), false)); + } +} + +Decl *import(ASTUnit *ToAST, Decl *FromDecl) { + lazyInitImporter(ToAST); + return Importer->Import(FromDecl); + } + +QualType import(ASTUnit *ToAST, QualType FromType) { + lazyInitImporter(ToAST); + return Importer->Import(FromType); +} }; // We may have several From contexts and related translation units. In each @@ -184,6 +225,28 @@ // vector is expanding, with the list we won't have these issues. std::list FromTUs; + void lazyInitToAST(Language ToLang) { +if (ToAST) + return; +ArgVector ToArgs = getArgVectorForLanguage(ToLang); +// Build the AST from an empty file. +ToAST = tooling::buildASTFromCodeWithArgs(/*Code=*/"", ToArgs, "empty.cc"); +TU::BeginSourceFile(&*ToAST); + } + + TU *findFromTU(Decl *From) { +// Create a virtual file in the To Ctx which corresponds to the file from +// which we want to import the `From` Decl. Without this source locations +// will be invalid in the ToCtx. +auto It = std::find_if(FromTUs.begin(), FromTUs.end(), [From](const TU &E) { + return E.TUDecl == From->getTranslationUnitDecl(); +}); +assert(It != FromTUs.end()); +assert(ToAST); +createVirtualFileIfNeeded(ToAST.get(), It->FileName, It->Code); +return &*It; + } + public: // We may have several From context but only one To context. std::unique_ptr ToAST; @@ -214,15 +277,12 @@ ToCode = ToSrcCode; assert(!ToAST); ToAST = tooling::buildASTFromCodeWithArgs(ToCode, ToArgs, OutputFileName); +TU::BeginSourceFile(&*ToAST); -ASTContext &FromCtx = FromTU.Unit->getASTContext(), - &ToCtx = ToAST->getASTContext(); +ASTContext &FromCtx = FromTU.Unit->getASTContext(); createVirtualFileIfNeeded(ToAST.get(), InputFileName, FromTU.Code); -ASTImporter Importer(ToCtx, ToAST->getFileManager(), FromCtx, - FromTU.Unit->getFileManager(), false); - IdentifierInfo *ImportedII = &FromCtx.Idents.get(Identifier); assert(ImportedII && "Declaration with the given identifier " "should be specified in test!"); @@ -233,7 +293,7 @@ assert(FoundDecls.size() == 1); -Decl *Imported = Importer.Import(FoundDecls.front()); +Decl *Imported = FromTU.import(ToAST.get(), FoundDecls.front()); assert(Imported); return std::make_tuple(*FoundDecls.begin(), Imported); } @@ -269,29 +329,17 @@ // May be called several times in a given test. // The different instances of the param From may have different ASTContext. Decl *Import(Decl *From, Language ToLang) { -if (!ToAST) { - ArgVector ToArgs = getArgVectorForLanguage(ToLang); -
[PATCH] D47946: [ASTmporter] Fix infinite recursion on function import with struct definition in parameters
gerazo added a comment. In https://reviews.llvm.org/D47946#1127679, @balazske wrote: > Problem: This change interferes with https://reviews.llvm.org/D47445. > Probably that should be committed, it is approved already. Ok. I'll wait for the other thing to be committed and I will rework this immediately. Thanks for telling. Repository: rC Clang https://reviews.llvm.org/D47946 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D47946: [ASTmporter] Fix infinite recursion on function import with struct definition in parameters
gerazo updated this revision to Diff 151533. gerazo added a comment. Updated to not conflict with and use the stuff implemented in https://reviews.llvm.org/D47445 (so became a bit smaller) Now, it is ready for a review. Enjoy! https://reviews.llvm.org/D47946 Files: lib/AST/ASTImporter.cpp unittests/AST/ASTImporterTest.cpp Index: unittests/AST/ASTImporterTest.cpp === --- unittests/AST/ASTImporterTest.cpp +++ unittests/AST/ASTImporterTest.cpp @@ -171,13 +171,34 @@ std::string FileName; std::unique_ptr Unit; TranslationUnitDecl *TUDecl = nullptr; +std::unique_ptr Importer; + TU(StringRef Code, StringRef FileName, ArgVector Args) : Code(Code), FileName(FileName), Unit(tooling::buildASTFromCodeWithArgs(this->Code, Args, this->FileName)), TUDecl(Unit->getASTContext().getTranslationUnitDecl()) { Unit->enableSourceFileDiagnostics(); } + +void lazyInitImporter(ASTUnit *ToAST) { + assert(ToAST); + if (!Importer) { +Importer.reset(new ASTImporter( +ToAST->getASTContext(), ToAST->getFileManager(), +Unit->getASTContext(), Unit->getFileManager(), false)); + } +} + +Decl *import(ASTUnit *ToAST, Decl *FromDecl) { + lazyInitImporter(ToAST); + return Importer->Import(FromDecl); + } + +QualType import(ASTUnit *ToAST, QualType FromType) { + lazyInitImporter(ToAST); + return Importer->Import(FromType); +} }; // We may have several From contexts and related translation units. In each @@ -189,6 +210,28 @@ // vector is expanding, with the list we won't have these issues. std::list FromTUs; + void lazyInitToAST(Language ToLang) { +if (ToAST) + return; +ArgVector ToArgs = getArgVectorForLanguage(ToLang); +// Build the AST from an empty file. +ToAST = tooling::buildASTFromCodeWithArgs(/*Code=*/"", ToArgs, "empty.cc"); +ToAST->enableSourceFileDiagnostics(); + } + + TU *findFromTU(Decl *From) { +// Create a virtual file in the To Ctx which corresponds to the file from +// which we want to import the `From` Decl. Without this source locations +// will be invalid in the ToCtx. +auto It = std::find_if(FromTUs.begin(), FromTUs.end(), [From](const TU &E) { + return E.TUDecl == From->getTranslationUnitDecl(); +}); +assert(It != FromTUs.end()); +assert(ToAST); +createVirtualFileIfNeeded(ToAST.get(), It->FileName, It->Code); +return &*It; + } + public: // We may have several From context but only one To context. std::unique_ptr ToAST; @@ -221,14 +264,10 @@ ToAST = tooling::buildASTFromCodeWithArgs(ToCode, ToArgs, OutputFileName); ToAST->enableSourceFileDiagnostics(); -ASTContext &FromCtx = FromTU.Unit->getASTContext(), - &ToCtx = ToAST->getASTContext(); +ASTContext &FromCtx = FromTU.Unit->getASTContext(); createVirtualFileIfNeeded(ToAST.get(), InputFileName, FromTU.Code); -ASTImporter Importer(ToCtx, ToAST->getFileManager(), FromCtx, - FromTU.Unit->getFileManager(), false); - IdentifierInfo *ImportedII = &FromCtx.Idents.get(Identifier); assert(ImportedII && "Declaration with the given identifier " "should be specified in test!"); @@ -239,7 +278,7 @@ assert(FoundDecls.size() == 1); -Decl *Imported = Importer.Import(FoundDecls.front()); +Decl *Imported = FromTU.import(ToAST.get(), FoundDecls.front()); assert(Imported); return std::make_tuple(*FoundDecls.begin(), Imported); } @@ -276,30 +315,17 @@ // May be called several times in a given test. // The different instances of the param From may have different ASTContext. Decl *Import(Decl *From, Language ToLang) { -if (!ToAST) { - ArgVector ToArgs = getArgVectorForLanguage(ToLang); - // Build the AST from an empty file. - ToAST = - tooling::buildASTFromCodeWithArgs(/*Code=*/"", ToArgs, "empty.cc"); - ToAST->enableSourceFileDiagnostics(); -} - -// Create a virtual file in the To Ctx which corresponds to the file from -// which we want to import the `From` Decl. Without this source locations -// will be invalid in the ToCtx. -auto It = std::find_if(FromTUs.begin(), FromTUs.end(), [From](const TU &E) { - return E.TUDecl == From->getTranslationUnitDecl(); -}); -assert(It != FromTUs.end()); -createVirtualFileIfNeeded(ToAST.get(), It->FileName, It->Code); - -ASTContext &FromCtx = From->getASTContext(), - &ToCtx = ToAST->getASTContext(); -ASTImporter Importer(ToCtx, ToAST->getFileManager(), FromCtx, - FromCtx.getSourceManager().getFileManager(), false); -return Importer.Import(From); +lazyInitToAST(ToLang); +TU *FromTU = findFromTU(From); +return FromTU->import(To
[PATCH] D47946: [ASTmporter] Fix infinite recursion on function import with struct definition in parameters
gerazo updated this revision to Diff 152055. gerazo added a comment. Added @martong 's suggestions. https://reviews.llvm.org/D47946 Files: lib/AST/ASTImporter.cpp unittests/AST/ASTImporterTest.cpp Index: unittests/AST/ASTImporterTest.cpp === --- unittests/AST/ASTImporterTest.cpp +++ unittests/AST/ASTImporterTest.cpp @@ -165,19 +165,43 @@ // Buffer for the To context, must live in the test scope. std::string ToCode; + // Represents a "From" translation unit and holds an importer object which we + // use to import from this translation unit. struct TU { // Buffer for the context, must live in the test scope. std::string Code; std::string FileName; std::unique_ptr Unit; TranslationUnitDecl *TUDecl = nullptr; +std::unique_ptr Importer; + TU(StringRef Code, StringRef FileName, ArgVector Args) : Code(Code), FileName(FileName), Unit(tooling::buildASTFromCodeWithArgs(this->Code, Args, this->FileName)), TUDecl(Unit->getASTContext().getTranslationUnitDecl()) { Unit->enableSourceFileDiagnostics(); } + +void lazyInitImporter(ASTUnit *ToAST) { + assert(ToAST); + if (!Importer) { +Importer.reset(new ASTImporter( +ToAST->getASTContext(), ToAST->getFileManager(), +Unit->getASTContext(), Unit->getFileManager(), false)); + } + assert(&ToAST->getASTContext() == &Importer->getToContext()); +} + +Decl *import(ASTUnit *ToAST, Decl *FromDecl) { + lazyInitImporter(ToAST); + return Importer->Import(FromDecl); + } + +QualType import(ASTUnit *ToAST, QualType FromType) { + lazyInitImporter(ToAST); + return Importer->Import(FromType); +} }; // We may have several From contexts and related translation units. In each @@ -189,6 +213,28 @@ // vector is expanding, with the list we won't have these issues. std::list FromTUs; + void lazyInitToAST(Language ToLang) { +if (ToAST) + return; +ArgVector ToArgs = getArgVectorForLanguage(ToLang); +// Build the AST from an empty file. +ToAST = tooling::buildASTFromCodeWithArgs(/*Code=*/"", ToArgs, "empty.cc"); +ToAST->enableSourceFileDiagnostics(); + } + + TU *findFromTU(Decl *From) { +// Create a virtual file in the To Ctx which corresponds to the file from +// which we want to import the `From` Decl. Without this source locations +// will be invalid in the ToCtx. +auto It = std::find_if(FromTUs.begin(), FromTUs.end(), [From](const TU &E) { + return E.TUDecl == From->getTranslationUnitDecl(); +}); +assert(It != FromTUs.end()); +assert(ToAST); +createVirtualFileIfNeeded(ToAST.get(), It->FileName, It->Code); +return &*It; + } + public: // We may have several From context but only one To context. std::unique_ptr ToAST; @@ -221,14 +267,10 @@ ToAST = tooling::buildASTFromCodeWithArgs(ToCode, ToArgs, OutputFileName); ToAST->enableSourceFileDiagnostics(); -ASTContext &FromCtx = FromTU.Unit->getASTContext(), - &ToCtx = ToAST->getASTContext(); +ASTContext &FromCtx = FromTU.Unit->getASTContext(); createVirtualFileIfNeeded(ToAST.get(), InputFileName, FromTU.Code); -ASTImporter Importer(ToCtx, ToAST->getFileManager(), FromCtx, - FromTU.Unit->getFileManager(), false); - IdentifierInfo *ImportedII = &FromCtx.Idents.get(Identifier); assert(ImportedII && "Declaration with the given identifier " "should be specified in test!"); @@ -239,7 +281,7 @@ assert(FoundDecls.size() == 1); -Decl *Imported = Importer.Import(FoundDecls.front()); +Decl *Imported = FromTU.import(ToAST.get(), FoundDecls.front()); assert(Imported); return std::make_tuple(*FoundDecls.begin(), Imported); } @@ -276,30 +318,17 @@ // May be called several times in a given test. // The different instances of the param From may have different ASTContext. Decl *Import(Decl *From, Language ToLang) { -if (!ToAST) { - ArgVector ToArgs = getArgVectorForLanguage(ToLang); - // Build the AST from an empty file. - ToAST = - tooling::buildASTFromCodeWithArgs(/*Code=*/"", ToArgs, "empty.cc"); - ToAST->enableSourceFileDiagnostics(); -} - -// Create a virtual file in the To Ctx which corresponds to the file from -// which we want to import the `From` Decl. Without this source locations -// will be invalid in the ToCtx. -auto It = std::find_if(FromTUs.begin(), FromTUs.end(), [From](const TU &E) { - return E.TUDecl == From->getTranslationUnitDecl(); -}); -assert(It != FromTUs.end()); -createVirtualFileIfNeeded(ToAST.get(), It->FileName, It->Code); - -ASTContext &FromCtx = From->getASTContext(), - &ToCtx = ToAST->getASTContext(); -ASTImporter I
[PATCH] D54918: Apply clang-format to GenericTaintChecker.cpp
gerazo accepted this revision. gerazo added a comment. This revision is now accepted and ready to land. Looks correct clang-format to me. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D54918/new/ https://reviews.llvm.org/D54918 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D47946: [ASTmporter] Fix infinite recursion on function import with struct definition in parameters
gerazo added a comment. @a.sidorin what do you think? https://reviews.llvm.org/D47946 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D47946: [ASTImporter] Fix infinite recursion on function import with struct definition in parameters
gerazo marked 2 inline comments as done. gerazo added inline comments. Comment at: unittests/AST/ASTImporterTest.cpp:234 +assert(ToAST); +createVirtualFileIfNeeded(ToAST.get(), It->FileName, It->Code); +return &*It; a.sidorin wrote: > Can we move the side effect into import()? Sure, thank you. https://reviews.llvm.org/D47946 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D47946: [ASTImporter] Fix infinite recursion on function import with struct definition in parameters
gerazo updated this revision to Diff 154991. gerazo marked an inline comment as done. gerazo added a comment. Minor fixes for Aleksei's comments. https://reviews.llvm.org/D47946 Files: lib/AST/ASTImporter.cpp unittests/AST/ASTImporterTest.cpp Index: unittests/AST/ASTImporterTest.cpp === --- unittests/AST/ASTImporterTest.cpp +++ unittests/AST/ASTImporterTest.cpp @@ -284,19 +284,44 @@ // Buffer for the To context, must live in the test scope. std::string ToCode; + // Represents a "From" translation unit and holds an importer object which we + // use to import from this translation unit. struct TU { // Buffer for the context, must live in the test scope. std::string Code; std::string FileName; std::unique_ptr Unit; TranslationUnitDecl *TUDecl = nullptr; +std::unique_ptr Importer; + TU(StringRef Code, StringRef FileName, ArgVector Args) : Code(Code), FileName(FileName), Unit(tooling::buildASTFromCodeWithArgs(this->Code, Args, this->FileName)), TUDecl(Unit->getASTContext().getTranslationUnitDecl()) { Unit->enableSourceFileDiagnostics(); } + +void lazyInitImporter(ASTUnit *ToAST) { + assert(ToAST); + if (!Importer) { +Importer.reset(new ASTImporter( +ToAST->getASTContext(), ToAST->getFileManager(), +Unit->getASTContext(), Unit->getFileManager(), false)); + } + assert(&ToAST->getASTContext() == &Importer->getToContext()); + createVirtualFileIfNeeded(ToAST, FileName, Code); +} + +Decl *import(ASTUnit *ToAST, Decl *FromDecl) { + lazyInitImporter(ToAST); + return Importer->Import(FromDecl); + } + +QualType import(ASTUnit *ToAST, QualType FromType) { + lazyInitImporter(ToAST); + return Importer->Import(FromType); +} }; // We may have several From contexts and related translation units. In each @@ -308,6 +333,26 @@ // vector is expanding, with the list we won't have these issues. std::list FromTUs; + void lazyInitToAST(Language ToLang) { +if (ToAST) + return; +ArgVector ToArgs = getArgVectorForLanguage(ToLang); +// Build the AST from an empty file. +ToAST = tooling::buildASTFromCodeWithArgs(/*Code=*/"", ToArgs, "empty.cc"); +ToAST->enableSourceFileDiagnostics(); + } + + TU *findFromTU(Decl *From) { +// Create a virtual file in the To Ctx which corresponds to the file from +// which we want to import the `From` Decl. Without this source locations +// will be invalid in the ToCtx. +auto It = std::find_if(FromTUs.begin(), FromTUs.end(), [From](const TU &E) { + return E.TUDecl == From->getTranslationUnitDecl(); +}); +assert(It != FromTUs.end()); +return &*It; + } + public: // We may have several From context but only one To context. std::unique_ptr ToAST; @@ -329,14 +374,10 @@ ToAST = tooling::buildASTFromCodeWithArgs(ToCode, ToArgs, OutputFileName); ToAST->enableSourceFileDiagnostics(); -ASTContext &FromCtx = FromTU.Unit->getASTContext(), - &ToCtx = ToAST->getASTContext(); +ASTContext &FromCtx = FromTU.Unit->getASTContext(); createVirtualFileIfNeeded(ToAST.get(), InputFileName, FromTU.Code); -ASTImporter Importer(ToCtx, ToAST->getFileManager(), FromCtx, - FromTU.Unit->getFileManager(), false); - IdentifierInfo *ImportedII = &FromCtx.Idents.get(Identifier); assert(ImportedII && "Declaration with the given identifier " "should be specified in test!"); @@ -347,7 +388,7 @@ assert(FoundDecls.size() == 1); -Decl *Imported = Importer.Import(FoundDecls.front()); +Decl *Imported = FromTU.import(ToAST.get(), FoundDecls.front()); assert(Imported); return std::make_tuple(*FoundDecls.begin(), Imported); } @@ -384,30 +425,17 @@ // May be called several times in a given test. // The different instances of the param From may have different ASTContext. Decl *Import(Decl *From, Language ToLang) { -if (!ToAST) { - ArgVector ToArgs = getArgVectorForLanguage(ToLang); - // Build the AST from an empty file. - ToAST = - tooling::buildASTFromCodeWithArgs(/*Code=*/"", ToArgs, "empty.cc"); - ToAST->enableSourceFileDiagnostics(); -} - -// Create a virtual file in the To Ctx which corresponds to the file from -// which we want to import the `From` Decl. Without this source locations -// will be invalid in the ToCtx. -auto It = std::find_if(FromTUs.begin(), FromTUs.end(), [From](const TU &E) { - return E.TUDecl == From->getTranslationUnitDecl(); -}); -assert(It != FromTUs.end()); -createVirtualFileIfNeeded(ToAST.get(), It->FileName, It->Code); - -ASTContext &FromCtx = From->getASTContext(), - &ToCtx = ToAST->getASTContext(); -
[PATCH] D47946: [ASTImporter] Fix infinite recursion on function import with struct definition in parameters
gerazo added a comment. @martong I don't have commit rights. Thanks for your help in advance. https://reviews.llvm.org/D47946 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D30691: [analyzer] Support for naive cross translational unit analysis
gerazo added a comment. Thanks George for the review. I will start working on the code right away. I've tried to answer the simpler cases. Comment at: tools/scan-build-py/libscanbuild/analyze.py:44 +CTU_FUNCTION_MAP_FILENAME = 'externalFnMap.txt' +CTU_TEMP_FNMAP_FOLDER = 'tmpExternalFnMaps' george.karpenkov wrote: > What would happen when multiple analyzer runs are launched concurrently in > the same directory? Would they race on this file? Yes and no. The 1st, collect part of CTU creates this file by aggregating all data from the build system, while the 2nd part which does the analysis itself only reads it. Multiple analysis can use this file simultaneously without problem. However, multiple collect phases launched on a system does not make sense. In this case, the later one would write over the previous results with the same data. Comment at: tools/scan-build-py/libscanbuild/analyze.py:64 if need_analyzer(args.build): -run_analyzer_parallel(args) +run_analyzer_with_ctu(args) else: george.karpenkov wrote: > `run_analyzer_with_ctu` is an unfortunate function name, since it may or may > not launch CTU depending on the passed arguments. Could we just call it > `run_analyzer`? We have an other run_analyzer method but still, it is a good idead, I will make something better up. Comment at: tools/scan-build-py/libscanbuild/analyze.py:103 +def prefix_with(constant, pieces): +""" From a sequence create another sequence where every second element george.karpenkov wrote: > Actually I would prefer a separate NFC PR just moving this function. This PR > is already too complicated as it is. Yes. The only reason was for the move to make it testable. However, we need more tests as you wrote down below. Comment at: tools/scan-build-py/libscanbuild/analyze.py:120 + func_map_cmd=args.func_map_cmd) +if hasattr(args, 'ctu_phases') and hasattr(args.ctu_phases, 'dir') +else CtuConfig(collect=False, analyze=False, dir='', func_map_cmd='')) george.karpenkov wrote: > Extensive `hasattr` usage is often a codesmell, and it hinders understanding. > Firstly, shouldn't `args.ctu_phases.dir` be always available, judging from > the code in `arguments.py`? > Secondly, in what cases `args.ctu_phases` is not available when we are > already going down the CTU code path? Shouldn't we throw an error at that > point instead of creating a default configuration? > (default-configurations-instead-of-crashing is also another way to introduce > very subtle and hard-to-catch error modes) It definitely needs more comments, so thanks, I will put them here. There are two separate possibilities here for this code part: 1. The user does not want CTU at all. In this case, no CTU phases are switched on (collect and analyze), so no CTU code will run. This is why dir has no importance in this case. 2. CTU capabilities were not even detected, so the help and available arguments about CTU are also missing. In this case we create a dummy config telling that everything is switched off. The reason for using hasattr was that not having CTU capabilities is not an exceptional condition, rather a matter of configuration. I felt that handling this with an exception would be misleading. Comment at: tools/scan-build-py/libscanbuild/analyze.py:128 +A function map contains the id of a function (mangled name) and the +originating source (the corresponding AST file) name.""" + george.karpenkov wrote: > Could you also specify what `func_map_lines` is (file? list? of strings?) in > the docstring? There's specific Sphinx syntax for that. Otherwise it is hard > to follow. Thanks, I'll do it. Comment at: tools/scan-build-py/libscanbuild/analyze.py:144 +if len(ast_files) == 1: +mangled_ast_pairs.append((mangled_name, ast_files.pop())) + george.karpenkov wrote: > Firstly, no need to modify the set in order to get the first element, just > use `next(iter(ast_files))`. > Secondly, when exactly do conflicting names happen? Is it a bug? Shouldn't > the tool log such cases? Nice catch, thanks. For your second question: Unfortunately, it is not a bug, rather a misfeature which we can't handle yet. There can be tricky build-systems where a single file with different configurations is built multiple times, so the same function signature will be present in multiple link units. The other option is when two separate compile units have an exact same function signature, but they are never linked together, so it is not a problem in the build itself. In this case, the cross compilation unit functionality cannot tell exactly which compilation unit to turn to for the function, because there are multiple valid choices. In this ca
[PATCH] D30691: [analyzer] Support for naive cross translational unit analysis
gerazo added a comment. The code modifications are coming soon (after doing some extensive testing) for the scan-build part. Comment at: tools/scan-build-py/libscanbuild/analyze.py:223 +ctu_config = get_ctu_config(args) +if ctu_config.collect: +shutil.rmtree(ctu_config.dir, ignore_errors=True) danielmarjamaki wrote: > danielmarjamaki wrote: > > not a big deal but I would use early exits in this function > with "not a big deal" I mean; feel free to ignore my comment if you want to > have it this way. I've checked it through. The only place for an early exit now would be before the else. The 1st and 2nd ifs are in fact non-orthogonal. Comment at: tools/scan-build-py/libscanbuild/analyze.py:145 +mangled_ast_pairs.append((mangled_name, ast_files.pop())) + +return mangled_ast_pairs george.karpenkov wrote: > Overall, instead of creating a dictionary with multiple elements, and then > converting to a list, it's much simper to only add an element to > `mangled_to_asts` when it is not already mapped to something (probably > logging a message otherwise), and then just return `mangled_to_asts.items()` The reason for the previous is that we need to count the occurence number ofdifferent mappings only let those pass through which don't have multiple variations. Comment at: tools/scan-build-py/libscanbuild/analyze.py:189 +# Remove all temporary files +shutil.rmtree(fnmap_dir, ignore_errors=True) + gerazo wrote: > george.karpenkov wrote: > > Having an analysis tool remove files is scary, what if (maybe not in this > > revision, but in a future iteration) a bug is introduced, and the tool > > removes user code instead? > > Why not just create a temporary directory with `tempfile.mkdtemp`, put all > > temporary files there, and then simply iterate through them? > > Then you would be able to get rid of the constant `CPU_TEMP_FNMAP_FOLDER` > > entirely, and OS would be responsible for cleanup. > Yes, you are right. We are essentially using a temp dir. Because of the size > we first had to put it next to the project (not on tmp drive for instance) > and for debugging purposes we gave a name to it. Still it can be done with > mkdtemp as well. Finally, I came to the conclusion that mkdtemp would not be better than the current solution. In order to find our created dir by other threads, we need a designated name. Suffixing it by generated name would further complicate things as we need not to allow multiple concurrent runs here. The current solution is more robust from this point of view. Comment at: tools/scan-build-py/libscanbuild/analyze.py:241 +run_analyzer_parallel(args) +shutil.rmtree(ctu_config.dir, ignore_errors=True) +else: george.karpenkov wrote: > Same as the comment above about removing folders. Also it seems like there > should be a way to remove redundancy in `if collect / remove tree` block > repeated twice. Th previous call for data removal happens because the user asked for a collect run, so we clean data to do a recollection. This second one happens because the user asked for a full recollection and anaylsis run all in one. So here we destroy the temp data for user's convenience. This happens after, not before like previously. The default behavior is to do this when the user uses the tool the easy way (collect and analyze all in one) and we intentionally keep collection data if the user only asks for a collect or an analyze run. So with this, the user can use a collect run's results for multiple analyze runs. This is written in the command line help. I will definitely put comments here to explain. Comment at: tools/scan-build-py/libscanbuild/analyze.py:296 +'command': [execution.cmd[0], '-c'] + compilation.flags, +'ctu': json.loads(os.getenv('ANALYZE_BUILD_CTU')) } george.karpenkov wrote: > Again, is it possible to avoid JSON-over-environment-variables? There is an other thing against changing this. Currently the interface here using env variables is used by intercept-build, analyze-build and scan-build tool as well. In order to drop json, we need to change those tools too. It would be a separate patch definitely. Comment at: tools/scan-build-py/libscanbuild/analyze.py:561 +except OSError: +pass +ast_command = [opts['clang'], '-emit-ast'] gerazo wrote: > george.karpenkov wrote: > > `try/except/pass` is almost always bad. > > When can the error occur? Why are we ignoring it? > I think this code is redundant with the if above. Here the folders are created on demand. Because these are created parallel by multiple processes, there is small chance that an other process already created the folder between the isdir check and the makedirs c
[PATCH] D30691: [analyzer] Support for naive cross translational unit analysis
gerazo added inline comments. Comment at: tools/scan-build-py/libscanbuild/analyze.py:44 +CTU_FUNCTION_MAP_FILENAME = 'externalFnMap.txt' +CTU_TEMP_FNMAP_FOLDER = 'tmpExternalFnMaps' george.karpenkov wrote: > gerazo wrote: > > george.karpenkov wrote: > > > What would happen when multiple analyzer runs are launched concurrently > > > in the same directory? Would they race on this file? > > Yes and no. The 1st, collect part of CTU creates this file by aggregating > > all data from the build system, while the 2nd part which does the analysis > > itself only reads it. Multiple analysis can use this file simultaneously > > without problem. However, multiple collect phases launched on a system does > > not make sense. In this case, the later one would write over the previous > > results with the same data. > > However, multiple collect phases launched on a system does not make sense > > Why not? What about a system with multiple users, where code is in the shared > directory? Or even simpler: I've launched a background process, forgot about > it, and then launched it again? > > > In this case, the later one would write over the previous results with the > > same data. > > That is probably fine, I am more worried about racing, where process B would > be reading a partially overriden file (not completely sure whether it is > possible) > I see your point. In order to create the multiple user scenario you've mentioned, those users need to give the exact same output folders for their jobs. Our original bet was that our users are not doing this as the scan-build tool itself is also cannot be used this way. Still the process left there is something to consider. I will plan some kind of locking mechanism to avoid situations like this. Comment at: tools/scan-build-py/libscanbuild/analyze.py:609 +# Recover namedtuple from json when coming from analyze_cc +if not hasattr(ctu_config, 'collect'): +ctu_config = CtuConfig(collect=ctu_config[0], george.karpenkov wrote: > gerazo wrote: > > george.karpenkov wrote: > > > In which case is this branch hit? Isn't improperly formed input argument > > > indicative of an internal error at this stage? > > An other part of scan-build-py, analyze_cc uses namedtuple to json format > > to communicate. However, the names are not coming back from json, so this > > code helps in this. This is the case when someone uses the whole toolset > > with compiler wrapping. All the environment variable hassle is also > > happening because of this. So these env vars are not for user modification > > (as you've suggested earlier). > OK so `opts['ctu']` is a tuple or a named tuple depending on how this > function is entered? BTW could you point me to the `analyze_cc` entry point? > > For the purpose of having more uniform code with less cases to care about, do > you think we could just use ordinary tuples instead of constructing a named > one, since we have to deconstruct an ordinary tuple in any case? Using a NamedTuple improves readability of the code a lot with less comments. It is unfortunate that serializing it is not solved by Python. I think moving this code to the entry point would make the whole thing much nicer. The entry point is at analyze_compiler_wrapper Comment at: tools/scan-build-py/libscanbuild/arguments.py:376 +metavar='', +default='ctu-dir', +help="""Defines the temporary directory used between ctu george.karpenkov wrote: > BTW can we also explicitly add `dest='ctu_dir'` here, as otherwise I was > initially very confused as to where the variable is set. Yes, of course. https://reviews.llvm.org/D30691 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27753: [analyzer] alpha.security.DirtyScalar Checker
gerazo updated this revision to Diff 82336. https://reviews.llvm.org/D27753 Files: include/clang/StaticAnalyzer/Checkers/Checkers.td lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/DirtyScalarChecker.cpp lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp test/Analysis/dirty-scalar-unbounded.cpp Index: test/Analysis/dirty-scalar-unbounded.cpp === --- /dev/null +++ test/Analysis/dirty-scalar-unbounded.cpp @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.security.taint,alpha.security.DirtyScalar -verify -analyzer-config alpha.security.DirtyScalar:criticalOnly=true %s +// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.security.taint,alpha.security.DirtyScalar -verify -analyzer-config alpha.security.DirtyScalar:criticalOnly=false -DDIRTYSCALARSTRICT=1 %s + +#include "Inputs/system-header-simulator.h" + +typedef long ssize_t; + +ssize_t recv(int s, void *buf, size_t len, int flags); + +void gets_tainted_ival(int val) { + (void)val; +} + +void gets_tainted_uval(unsigned int val) { + (void)val; +} + +int tainted_usage() { + int size; + scanf("%d", &size); + char *buff = new char[size]; // expected-warning{{Tainted variable is used without proper bound checking}} + for (int i = 0; i < size; ++i) { +#if DIRTYSCALARSTRICT +// expected-warning@-2{{Tainted variable is used without proper bound checking}} +#endif +scanf("%d", &buff[i]); + } + buff[size - 1] = 0; // expected-warning{{Tainted variable is used without proper bound checking}} + *(buff + size - 2) = 0; // expected-warning{{Tainted variable is used without proper bound checking}} + gets_tainted_ival(size); +#if DIRTYSCALARSTRICT +// expected-warning@-2{{Tainted variable is used without proper bound checking}} +#endif + + return 0; +} + +int tainted_usage_checked() { + int size; + scanf("%d", &size); + if (size < 0 || size > 255) +return -1; + char *buff = new char[size]; // no warning + for (int i = 0; i < size; ++i) { // no warning +scanf("%d", &buff[i]); // no warning + } + buff[size - 1] = 0; // no warning + *(buff + size - 2) = 0; // no warning + gets_tainted_ival(size); // no warning + + unsigned int idx; + scanf("%d", &idx); + if (idx > 255) +return -1; + gets_tainted_uval(idx); // no warning + + return 0; +} + +int detect_tainted(char const **messages) { + int sock, index; + scanf("%d", &sock); + if (recv(sock, &index, sizeof(index), 0) != sizeof(index)) { +#if DIRTYSCALARSTRICT +// expected-warning@-2{{Tainted variable is used without proper bound checking}} +#endif +return -1; + } + int index2 = index; + printf("%s\n", messages[index]); // expected-warning{{Tainted variable is used without proper bound checking}} + printf("%s\n", messages[index2]); // expected-warning{{Tainted variable is used without proper bound checking}} + + return 0; +} + +int skip_sizes_likely_used_for_table_access(char const **messages) { + int sock; + char byte; + + scanf("%d", &sock); + if (recv(sock, &byte, sizeof(byte), 0) != sizeof(byte)) { +#if DIRTYSCALARSTRICT +// expected-warning@-2{{Tainted variable is used without proper bound checking}} +#endif +return -1; + } + char byte2 = byte; + printf("%s\n", messages[byte]); // no warning + printf("%s\n", messages[byte2]); // no warning + + return 0; +} Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp === --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -222,6 +222,7 @@ .Case("getline", TaintPropagationRule(2, 0)) .Case("getdelim", TaintPropagationRule(3, 0)) .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) +.Case("recv", TaintPropagationRule(InvalidArgIndex, 1, true)) .Default(TaintPropagationRule()); if (!Rule.isNull()) Index: lib/StaticAnalyzer/Checkers/DirtyScalarChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/DirtyScalarChecker.cpp @@ -0,0 +1,231 @@ +//===-- DirtyScalarChecker.cpp *- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +// +// Reports the usage of dirty integers in code. +// A dirty value is tainted and wasn't bound checked properly by the programmer. +// By default (criticalOnly == true) reports dirty usage in +// - memcpy, malloc, calloc, strcpy, strncpy, memmove functions +// - array indexing +// - memory allocation with new +// - pointer arithmetic +// otherwise (criticalOnly == false) it also reports usage as +// - function argument +// - loop condition +// +//===--
[PATCH] D27753: [analyzer] alpha.security.DirtyScalar Checker
gerazo marked 6 inline comments as done. gerazo added a comment. Thank you very much for your help. I've added all suggested modifications including tests covering all checker option settings. https://reviews.llvm.org/D27753 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27753: [analyzer] alpha.security.DirtyScalar Checker
gerazo marked an inline comment as done. gerazo added a comment. So thank you again for the valuable questions. In this checker, I give warnings for values which are both tainted and were also not checked by the programmer. So unlike GenericTaintChecker, I do implement the boundedness check here for certain, interesting constructs (which is controlled by the critical option). GenericTaintChecker focuses purely on taintedness, almost like a service for other checkers. I've added a new rule to it, improving the taintedness logic, but I felt mixing the bound checking logic into it would make the two ideas inseparable. I've also checked others using tainted values. ArrayBoundCheckerV2 also works with array indexing and modifies its behaviour on finding tainted values. However, the main difference is that ArrayBoundCheckerV2 uses region information to check bounds which may or may not be present, while this checker can give a warning whenever any information from the constraint manager does not prove that any bound checking were performed on the value before (and potentially works on many other constructs where region information shouldn't be there at all). Not having correct region information with tainted values is typical when reading data from unknown sources. This dirty approach is better in this regard. ArrayBoundCheckerV2 on the other hand can give warnings solely based on region information. Yes, it can happen that both will give a warning as well for the same construct. Do you think it is distracting? Should I remove the array indexing checks from this checker (still it gives warning for pointer arithmetic as well)? Comment at: test/Analysis/dirty-scalar-unbounded.cpp:1 +// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.security.taint,alpha.security.DirtyScalar -verify -analyzer-config alpha.security.DirtyScalar:criticalOnly=false %s + a.sidorin wrote: > 1. It will be good to have tests for option set to true. > 2. Is there any test that makes usage of 'RecurseLimit` variable? Now both settings are covered. For RecurseLimit, I've added a named constant and better explanation. This is only a practical limit to not let the analysis dive too deep into complex loop expressions. The limit currently set should be adequate, so it would not make sense to set it programmatically. https://reviews.llvm.org/D27753 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D30691: [analyzer] Support for naive cross translational unit analysis
gerazo added inline comments. Comment at: tools/xtu-analysis/xtu-analyze.py:29 + +threading_factor = int(multiprocessing.cpu_count() * 1.5) +analyser_output_formats = ['plist-multi-file', 'plist', 'plist-html', danielmarjamaki wrote: > does this mean that if there are 4 cores this script will by default use 6 > threads? isn't that too aggressive? Yes, it does mean that. You are right, it is aggressive. To be honest, the xtu-build step is more io intensive where it really makes sense. In the xtu-analyze step, it is marginal when big files are compiled (more cpu, less io). We will put this one back to 1.0 instead. https://reviews.llvm.org/D30691 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27753: [analyzer] alpha.security.DirtyScalar Checker
gerazo added a comment. Hi, did you have time to check my changes? https://reviews.llvm.org/D27753 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27753: [analyzer] alpha.security.DirtyScalar Checker
gerazo marked an inline comment as done. gerazo added inline comments. Comment at: lib/StaticAnalyzer/Checkers/DirtyScalarChecker.cpp:184 +Ty = Ctx.IntTy; + if (!Ty->isIntegerType() || Ctx.getIntWidth(Ty) <= TooNarrowForBoundCheck) +return false; a.sidorin wrote: > Does the second check means that we exclude boolean and char values? I cannot > find any reason to do it for chars. Yes, we exclude them. Using lookup tables especially in cryptography sometimes involve reading a value from disk and than using this value immediately with a table lookup. This way, you use a dirty value directly in array indexing. Reading a byte and using it on a prepared 256 element table is common. As the read value gets bigger it is less performant and hence less common to do it. I've found exactly 1 false positive in openssl without this exclusion. https://reviews.llvm.org/D27753 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27753: [analyzer] alpha.security.DirtyScalar Checker
gerazo marked an inline comment as done. gerazo added a comment. Hmm... I am thinking on this issue for a week now... I've played with the idea of implementing cleansing rules in GenericTaintChecker. It would be elegant but unfortunately, I have to think they are not general. Cleansing of a string (because it has no terminating null at the end) is very different from integral type cleansing. A signed value has to be lower bound checked as well, an unsigned only gets upper bound treatment. It also turns out that the type itself also can't give enough information about the needed cleansing rule. A number used for array indexing can be properly bound checked on the region extents while a simple number can only be checked in a way that any upper bound checking was done at all on it... All this leads to the need of several types of taintednesses (string taintedness, array taintedness, general bound check taintedness) because the cleansing can only take down one type of taintedness at a time. That would be the only way for a checker to be able to access that the taintedness type specific to the checker is still there or was already cleansed by the central logic. For me it shows that cleansing rules belong to specific checkers and cannot be efficiently generalized even in case of a single int type. About the ArrayBoundCheckerV2: I agree that no redundant checks should be done system-wide. I would also extend ArrayBoundCheckV2 or put array checking into a separate checker. Currently that checker and the one implemented here do not give the same results. ArrayBoundCheckerV2 knows more about the array but is not willing to give a warning without knowing region information for the array. The easiest way would be to use one checker's code from the other and find out if the other is active and would already give a warning anyway... but I understand that it is against current architectural policies. https://reviews.llvm.org/D27753 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27753: [analyzer] alpha.security.DirtyScalar Checker
gerazo added a comment. > Stepping back a bit, what do you consider "dirty" vs "clean"? It seems that > you are looking for prove that the values are known to be within the bounds > of min and max int values. What happens if there is a comparison to an > unknown symbolic value? Should that be considered as clean or tainted? Are > there test cases for this? I consider values as clean when they were checked by the programmer from both sides. However, my implementation purely works from constraints in effect (and using min and max is just the broadest constraint I could find). So you are totally right that comparison with unknown symbols is not covered nor in implementation, nor in tests. Can you suggest a universally working method which can handle all cases (e.g. complex expressions on both sides of the operator)? If we could find such an approach, that would be something which could really go into the GenericTaintChecker as an improvement. And I would gladly rewrite this whole stuff to fit the more general solution. https://reviews.llvm.org/D27753 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D30831: [ASTImporter] Import fix of GCCAsmStmts w/ missing symbolic operands
gerazo created this revision. Do not drop the import of the whole function just because an asm statement in it has some missing symbolic names. https://reviews.llvm.org/D30831 Files: lib/AST/ASTImporter.cpp test/ASTMerge/asm/Inputs/asm-function.cpp test/ASTMerge/asm/test.cpp Index: test/ASTMerge/asm/test.cpp === --- test/ASTMerge/asm/test.cpp +++ test/ASTMerge/asm/test.cpp @@ -4,4 +4,5 @@ void testAsmImport() { asmFunc(12, 42); + asmFunc2(42); } Index: test/ASTMerge/asm/Inputs/asm-function.cpp === --- test/ASTMerge/asm/Inputs/asm-function.cpp +++ test/ASTMerge/asm/Inputs/asm-function.cpp @@ -9,3 +9,13 @@ res = bigres; return res; } + +int asmFunc2(int i) { + int res; + asm ("mov %1, %0 \t\n" + "inc %0 " + : "=r" (res) + : "r" (i) + : "cc"); + return res; +} Index: lib/AST/ASTImporter.cpp === --- lib/AST/ASTImporter.cpp +++ lib/AST/ASTImporter.cpp @@ -5218,14 +5218,14 @@ SmallVector Names; for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) { IdentifierInfo *ToII = Importer.Import(S->getOutputIdentifier(I)); -if (!ToII) - return nullptr; +// ToII is nullptr when no symbolic name is given for output operand +// see ParseStmtAsm::ParseAsmOperandsOpt Names.push_back(ToII); } for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) { IdentifierInfo *ToII = Importer.Import(S->getInputIdentifier(I)); -if (!ToII) - return nullptr; +// ToII is nullptr when no symbolic name is given for input operand +// see ParseStmtAsm::ParseAsmOperandsOpt Names.push_back(ToII); } Index: test/ASTMerge/asm/test.cpp === --- test/ASTMerge/asm/test.cpp +++ test/ASTMerge/asm/test.cpp @@ -4,4 +4,5 @@ void testAsmImport() { asmFunc(12, 42); + asmFunc2(42); } Index: test/ASTMerge/asm/Inputs/asm-function.cpp === --- test/ASTMerge/asm/Inputs/asm-function.cpp +++ test/ASTMerge/asm/Inputs/asm-function.cpp @@ -9,3 +9,13 @@ res = bigres; return res; } + +int asmFunc2(int i) { + int res; + asm ("mov %1, %0 \t\n" + "inc %0 " + : "=r" (res) + : "r" (i) + : "cc"); + return res; +} Index: lib/AST/ASTImporter.cpp === --- lib/AST/ASTImporter.cpp +++ lib/AST/ASTImporter.cpp @@ -5218,14 +5218,14 @@ SmallVector Names; for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) { IdentifierInfo *ToII = Importer.Import(S->getOutputIdentifier(I)); -if (!ToII) - return nullptr; +// ToII is nullptr when no symbolic name is given for output operand +// see ParseStmtAsm::ParseAsmOperandsOpt Names.push_back(ToII); } for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) { IdentifierInfo *ToII = Importer.Import(S->getInputIdentifier(I)); -if (!ToII) - return nullptr; +// ToII is nullptr when no symbolic name is given for input operand +// see ParseStmtAsm::ParseAsmOperandsOpt Names.push_back(ToII); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D30831: [ASTImporter] Import fix of GCCAsmStmts w/ missing symbolic operands
gerazo updated this revision to Diff 91541. gerazo added a comment. Better check not letting a real import problem passing through https://reviews.llvm.org/D30831 Files: lib/AST/ASTImporter.cpp test/ASTMerge/asm/Inputs/asm-function.cpp test/ASTMerge/asm/test.cpp Index: test/ASTMerge/asm/test.cpp === --- test/ASTMerge/asm/test.cpp +++ test/ASTMerge/asm/test.cpp @@ -4,4 +4,5 @@ void testAsmImport() { asmFunc(12, 42); + asmFunc2(42); } Index: test/ASTMerge/asm/Inputs/asm-function.cpp === --- test/ASTMerge/asm/Inputs/asm-function.cpp +++ test/ASTMerge/asm/Inputs/asm-function.cpp @@ -9,3 +9,13 @@ res = bigres; return res; } + +int asmFunc2(int i) { + int res; + asm ("mov %1, %0 \t\n" + "inc %0 " + : "=r" (res) + : "r" (i) + : "cc"); + return res; +} Index: lib/AST/ASTImporter.cpp === --- lib/AST/ASTImporter.cpp +++ lib/AST/ASTImporter.cpp @@ -5218,13 +5218,17 @@ SmallVector Names; for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) { IdentifierInfo *ToII = Importer.Import(S->getOutputIdentifier(I)); -if (!ToII) +// ToII is nullptr when no symbolic name is given for output operand +// see ParseStmtAsm::ParseAsmOperandsOpt +if (!ToII && S->getOutputIdentifier(I)) return nullptr; Names.push_back(ToII); } for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) { IdentifierInfo *ToII = Importer.Import(S->getInputIdentifier(I)); -if (!ToII) +// ToII is nullptr when no symbolic name is given for input operand +// see ParseStmtAsm::ParseAsmOperandsOpt +if (!ToII && S->getInputIdentifier(I)) return nullptr; Names.push_back(ToII); } Index: test/ASTMerge/asm/test.cpp === --- test/ASTMerge/asm/test.cpp +++ test/ASTMerge/asm/test.cpp @@ -4,4 +4,5 @@ void testAsmImport() { asmFunc(12, 42); + asmFunc2(42); } Index: test/ASTMerge/asm/Inputs/asm-function.cpp === --- test/ASTMerge/asm/Inputs/asm-function.cpp +++ test/ASTMerge/asm/Inputs/asm-function.cpp @@ -9,3 +9,13 @@ res = bigres; return res; } + +int asmFunc2(int i) { + int res; + asm ("mov %1, %0 \t\n" + "inc %0 " + : "=r" (res) + : "r" (i) + : "cc"); + return res; +} Index: lib/AST/ASTImporter.cpp === --- lib/AST/ASTImporter.cpp +++ lib/AST/ASTImporter.cpp @@ -5218,13 +5218,17 @@ SmallVector Names; for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) { IdentifierInfo *ToII = Importer.Import(S->getOutputIdentifier(I)); -if (!ToII) +// ToII is nullptr when no symbolic name is given for output operand +// see ParseStmtAsm::ParseAsmOperandsOpt +if (!ToII && S->getOutputIdentifier(I)) return nullptr; Names.push_back(ToII); } for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) { IdentifierInfo *ToII = Importer.Import(S->getInputIdentifier(I)); -if (!ToII) +// ToII is nullptr when no symbolic name is given for input operand +// see ParseStmtAsm::ParseAsmOperandsOpt +if (!ToII && S->getInputIdentifier(I)) return nullptr; Names.push_back(ToII); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D30831: [ASTImporter] Import fix of GCCAsmStmts w/ missing symbolic operands
gerazo marked an inline comment as done. gerazo added inline comments. Comment at: lib/AST/ASTImporter.cpp:5221 IdentifierInfo *ToII = Importer.Import(S->getOutputIdentifier(I)); -if (!ToII) - return nullptr; +// ToII is nullptr when no symbolic name is given for output operand +// see ParseStmtAsm::ParseAsmOperandsOpt a.sidorin wrote: > In such cases, we should check that FromIdentifier is `nullptr` too (to > detect cases where the result is `nullptr` due to import error). The check > will be still present but will look like: > ``` > if (!ToII && S->getOutputIdentifier()) > return nullptr; > ``` > The same below. Thank you Aleksei, done. https://reviews.llvm.org/D30831 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D27753: [analyzer] alpha.security.DirtyScalar Checker
gerazo created this revision. gerazo added reviewers: zaks.anna, dcoughlin. gerazo added a subscriber: cfe-commits. Herald added a subscriber: mgorny. Checker for catching tainted value usage without proper bound checking. Uses GenericTaintChecker which is also in alpha.security. https://reviews.llvm.org/D27753 Files: include/clang/StaticAnalyzer/Checkers/Checkers.td lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/DirtyScalarChecker.cpp lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp test/Analysis/dirty-scalar-unbounded.cpp Index: test/Analysis/dirty-scalar-unbounded.cpp === --- /dev/null +++ test/Analysis/dirty-scalar-unbounded.cpp @@ -0,0 +1,79 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.security.taint,alpha.security.DirtyScalar -verify -analyzer-config alpha.security.DirtyScalar:criticalOnly=false %s + +#include "Inputs/system-header-simulator.h" + +typedef long ssize_t; + +ssize_t recv(int s, void *buf, size_t len, int flags); + +void gets_tainted_ival(int val) { + (void)val; +} + +void gets_tainted_uval(unsigned int val) { + (void)val; +} + +int tainted_usage() { + int size; + scanf("%d", &size); + char *buff = new char[size]; // expected-warning{{Tainted variable is used without proper bound checking}} + for (int i = 0; i < size; ++i) { // expected-warning{{Tainted variable is used without proper bound checking}} +scanf("%d", &buff[i]); + } + buff[size - 1] = 0; // expected-warning{{Tainted variable is used without proper bound checking}} + *(buff + size - 2) = 0; // expected-warning{{Tainted variable is used without proper bound checking}} + gets_tainted_ival(size); // expected-warning{{Tainted variable is used without proper bound checking}} + + return 0; +} + +int tainted_usage_checked() { + int size; + scanf("%d", &size); + if (size < 0 || size > 255) +return -1; + char *buff = new char[size]; // no warning + for (int i = 0; i < size; ++i) { // no warning +scanf("%d", &buff[i]); // no warning + } + buff[size - 1] = 0; // no warning + *(buff + size - 2) = 0; // no warning + gets_tainted_ival(size); // no warning + + unsigned int idx; + scanf("%d", &idx); + if (idx > 255) +return -1; + gets_tainted_uval(idx); // no warning + + return 0; +} + +int detect_tainted(char const **messages) { + int sock, index; + scanf("%d", &sock); + if (recv(sock, &index, sizeof(index), 0) != sizeof(index)) { // expected-warning{{Tainted variable is used without proper bound checking}} +return -1; + } + int index2 = index; + printf("%s\n", messages[index]); // expected-warning{{Tainted variable is used without proper bound checking}} + printf("%s\n", messages[index2]); // expected-warning{{Tainted variable is used without proper bound checking}} + + return 0; +} + +int skip_sizes_likely_used_for_table_access(char const **messages) { + int sock; + char byte; + + scanf("%d", &sock); + if (recv(sock, &byte, sizeof(byte), 0) != sizeof(byte)) { // expected-warning{{Tainted variable is used without proper bound checking}} +return -1; + } + char byte2 = byte; + printf("%s\n", messages[byte]); // no warning + printf("%s\n", messages[byte2]); // no warning + + return 0; +} Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp === --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -222,6 +222,7 @@ .Case("getline", TaintPropagationRule(2, 0)) .Case("getdelim", TaintPropagationRule(3, 0)) .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) +.Case("recv", TaintPropagationRule(InvalidArgIndex, 1, true)) .Default(TaintPropagationRule()); if (!Rule.isNull()) Index: lib/StaticAnalyzer/Checkers/DirtyScalarChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/DirtyScalarChecker.cpp @@ -0,0 +1,221 @@ +//===-- DirtyScalarChecker.cpp *- C++ -*--// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +// +// Checks for dirty/tainted integers which are used without bound checking. +// By default (criticalOnly == true) reports dirty usage in +// - memcpy, malloc, calloc, strcpy, strncpy, memmove functions +// - array indexing +// - memory allocation with new +// - pointer arithmetic +// otherwise (criticalOnly == false) it also reports usage as +// - function argument +// - loop variable +// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/AST/ExprCXX.h" +#inc