Author: Artem Dergachev Date: 2021-08-26T13:34:29-07:00 New Revision: 73093599287cc6d546ac46652ee781789d7de61f
URL: https://github.com/llvm/llvm-project/commit/73093599287cc6d546ac46652ee781789d7de61f DIFF: https://github.com/llvm/llvm-project/commit/73093599287cc6d546ac46652ee781789d7de61f.diff LOG: [analyzer] Fix scan-build report deduplication. The previous behavior was to deduplicate reports based on md5 of the html file. This algorithm might have worked originally but right now HTML reports contain information rich enough to make them virtually always distinct which breaks deduplication entirely. The new strategy is to (finally) take advantage of IssueHash - the stable report identifier provided by clang that is the same if and only if the reports are duplicates of each other. Additionally, scan-build no longer performs deduplication on its own. Instead, the report file name is now based on the issue hash, and clang instances will silently refuse to produce a new html file when a duplicate already exists. This eliminates the problem entirely. The '-analyzer-config stable-report-filename' option is deprecated because report filenames are no longer unstable. A new option is introduced, '-analyzer-config verbose-report-filename', to produce verbose file names that look similar to the old "stable" file names. The old option acts as an alias to the new option. Differential Revision: https://reviews.llvm.org/D105167 Added: clang/test/Analysis/scan-build/Inputs/deduplication/1.c clang/test/Analysis/scan-build/Inputs/deduplication/2.c clang/test/Analysis/scan-build/Inputs/deduplication/header.h clang/test/Analysis/scan-build/deduplication.test clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-3.html Modified: clang/include/clang/Analysis/PathDiagnostic.h clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp clang/test/Analysis/analyzer-config.c clang/test/Analysis/scan-build/rebuild_index/rebuild_index.test clang/tools/scan-build/bin/scan-build Removed: clang/test/Analysis/scan-build/rebuild_index/report-3.html clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-4.html ################################################################################ diff --git a/clang/include/clang/Analysis/PathDiagnostic.h b/clang/include/clang/Analysis/PathDiagnostic.h index 04bef1fa5334d..14860c1168e21 100644 --- a/clang/include/clang/Analysis/PathDiagnostic.h +++ b/clang/include/clang/Analysis/PathDiagnostic.h @@ -75,14 +75,8 @@ struct PathDiagnosticConsumerOptions { bool ShouldSerializeStats = false; /// If the consumer intends to produce multiple output files, should it - /// use randomly generated file names for these files (with the tiny risk of - /// having random collisions) or deterministic human-readable file names - /// (with a larger risk of deterministic collisions or invalid characters - /// in the file name). We should not really give this choice to the users - /// because deterministic mode is always superior when done right, but - /// for some consumers this mode is experimental and needs to be - /// off by default. - bool ShouldWriteStableReportFilename = false; + /// use a pseudo-random file name name or a human-readable file name. + bool ShouldWriteVerboseReportFilename = false; /// Whether the consumer should treat consumed diagnostics as hard errors. /// Useful for breaking your build when issues are found. diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index f0359d2dbb3c2..e97e0a6892a93 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -190,7 +190,13 @@ ANALYZER_OPTION(bool, ShouldReportIssuesInMainSourceFile, false) ANALYZER_OPTION(bool, ShouldWriteStableReportFilename, "stable-report-filename", - "Whether or not the report filename should be random or not.", + "Deprecated: report filenames are now always stable. " + "See also 'verbose-report-filename'.", + false) + +ANALYZER_OPTION(bool, ShouldWriteVerboseReportFilename, "verbose-report-filename", + "Whether or not the report filename should contain extra " + "information about the issue.", false) ANALYZER_OPTION( diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h index ccf35e0a81eca..7514eee7244f8 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h @@ -395,7 +395,11 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> { return {FullCompilerInvocation, ShouldDisplayMacroExpansions, ShouldSerializeStats, - ShouldWriteStableReportFilename, + // The stable report filename option is deprecated because + // file names are now always stable. Now the old option acts as + // an alias to the new verbose filename option because this + // closely mimics the behavior under the old option. + ShouldWriteStableReportFilename || ShouldWriteVerboseReportFilename, AnalyzerWerror, ShouldApplyFixIts, ShouldDisplayCheckerNameForText}; diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index c90046ffb4131..e7df9a70839de 100644 --- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -245,6 +245,18 @@ void HTMLDiagnostics::FlushDiagnosticsImpl( ReportDiag(*Diag, filesMade); } +static llvm::SmallString<32> getIssueHash(const PathDiagnostic &D, + const Preprocessor &PP) { + SourceManager &SMgr = PP.getSourceManager(); + PathDiagnosticLocation UPDLoc = D.getUniqueingLoc(); + FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid() + ? UPDLoc.asLocation() + : D.getLocation().asLocation()), + SMgr); + return getIssueHash(L, D.getCheckerName(), D.getBugType(), + D.getDeclWithIssue(), PP.getLangOpts()); +} + void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, FilesMade *filesMade) { // Create the HTML directory if it is missing. @@ -271,11 +283,6 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, // Create a new rewriter to generate HTML. Rewriter R(const_cast<SourceManager&>(SMgr), PP.getLangOpts()); - // The file for the first path element is considered the main report file, it - // will usually be equivalent to SMgr.getMainFileID(); however, it might be a - // header when -analyzer-opt-analyze-headers is used. - FileID ReportFile = path.front()->getLocation().asLocation().getExpansionLoc().getFileID(); - // Get the function/method name SmallString<128> declName("unknown"); int offsetDecl = 0; @@ -302,46 +309,52 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, // Create a path for the target HTML file. int FD; - SmallString<128> Model, ResultPath; - - if (!DiagOpts.ShouldWriteStableReportFilename) { - llvm::sys::path::append(Model, Directory, "report-%%%%%%.html"); - if (std::error_code EC = - llvm::sys::fs::make_absolute(Model)) { - llvm::errs() << "warning: could not make '" << Model - << "' absolute: " << EC.message() << '\n'; - return; - } - if (std::error_code EC = llvm::sys::fs::createUniqueFile( - Model, FD, ResultPath, llvm::sys::fs::OF_Text)) { - llvm::errs() << "warning: could not create file in '" << Directory - << "': " << EC.message() << '\n'; - return; - } - } else { - int i = 1; - std::error_code EC; - do { - // Find a filename which is not already used - const FileEntry* Entry = SMgr.getFileEntryForID(ReportFile); - std::stringstream filename; - Model = ""; - filename << "report-" - << llvm::sys::path::filename(Entry->getName()).str() - << "-" << declName.c_str() - << "-" << offsetDecl - << "-" << i << ".html"; - llvm::sys::path::append(Model, Directory, - filename.str()); - EC = llvm::sys::fs::openFileForReadWrite( - Model, FD, llvm::sys::fs::CD_CreateNew, llvm::sys::fs::OF_None); - if (EC && EC != llvm::errc::file_exists) { - llvm::errs() << "warning: could not create file '" << Model - << "': " << EC.message() << '\n'; - return; - } - i++; - } while (EC); + + SmallString<128> FileNameStr; + llvm::raw_svector_ostream FileName(FileNameStr); + FileName << "report-"; + + // Historically, neither the stable report filename nor the unstable report + // filename were actually stable. That said, the stable report filename + // was more stable because it was mostly composed of information + // about the bug report instead of being completely random. + // Now both stable and unstable report filenames are in fact stable + // but the stable report filename is still more verbose. + if (DiagOpts.ShouldWriteVerboseReportFilename) { + // FIXME: This code relies on knowing what constitutes the issue hash. + // Otherwise deduplication won't work correctly. + FileID ReportFile = + path.back()->getLocation().asLocation().getExpansionLoc().getFileID(); + + const FileEntry *Entry = SMgr.getFileEntryForID(ReportFile); + + FileName << llvm::sys::path::filename(Entry->getName()).str() << "-" + << declName.c_str() << "-" << offsetDecl << "-"; + } + + FileName << StringRef(getIssueHash(D, PP)).substr(0, 6).str() << ".html"; + + SmallString<128> ResultPath; + llvm::sys::path::append(ResultPath, Directory, FileName.str()); + if (std::error_code EC = llvm::sys::fs::make_absolute(ResultPath)) { + llvm::errs() << "warning: could not make '" << ResultPath + << "' absolute: " << EC.message() << '\n'; + return; + } + + if (std::error_code EC = llvm::sys::fs::openFileForReadWrite( + ResultPath, FD, llvm::sys::fs::CD_CreateNew, + llvm::sys::fs::OF_None)) { + // Existence of the file corresponds to the situation where a diff erent + // Clang instance has emitted a bug report with the same issue hash. + // This is an entirely normal situation that does not deserve a warning, + // as apart from hash collisions this can happen because the reports + // are in fact similar enough to be considered duplicates of each other. + if (EC != llvm::errc::file_exists) { + llvm::errs() << "warning: could not create file in '" << Directory + << "': " << EC.message() << '\n'; + } + return; } llvm::raw_fd_ostream os(FD, true); @@ -638,7 +651,6 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R, ? UPDLoc.asLocation() : D.getLocation().asLocation()), SMgr); - const Decl *DeclWithIssue = D.getDeclWithIssue(); StringRef BugCategory = D.getCategory(); if (!BugCategory.empty()) @@ -650,9 +662,7 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R, os << "\n<!-- FUNCTIONNAME " << declName << " -->\n"; - os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT " - << getIssueHash(L, D.getCheckerName(), D.getBugType(), DeclWithIssue, - PP.getLangOpts()) + os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT " << getIssueHash(D, PP) << " -->\n"; os << "\n<!-- BUGLINE " diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index 2a41e692dd59b..c9f9b438bfbb7 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -119,4 +119,5 @@ // CHECK-NEXT: unix.DynamicMemoryModeling:AddNoOwnershipChangeNotes = false // CHECK-NEXT: unix.DynamicMemoryModeling:Optimistic = false // CHECK-NEXT: unroll-loops = false +// CHECK-NEXT: verbose-report-filename = false // CHECK-NEXT: widen-loops = false diff --git a/clang/test/Analysis/scan-build/Inputs/deduplication/1.c b/clang/test/Analysis/scan-build/Inputs/deduplication/1.c new file mode 100644 index 0000000000000..ec2f9ec95e100 --- /dev/null +++ b/clang/test/Analysis/scan-build/Inputs/deduplication/1.c @@ -0,0 +1,5 @@ +#include "header.h" + +void bar() { + foo(); +} diff --git a/clang/test/Analysis/scan-build/Inputs/deduplication/2.c b/clang/test/Analysis/scan-build/Inputs/deduplication/2.c new file mode 100644 index 0000000000000..ec2f9ec95e100 --- /dev/null +++ b/clang/test/Analysis/scan-build/Inputs/deduplication/2.c @@ -0,0 +1,5 @@ +#include "header.h" + +void bar() { + foo(); +} diff --git a/clang/test/Analysis/scan-build/Inputs/deduplication/header.h b/clang/test/Analysis/scan-build/Inputs/deduplication/header.h new file mode 100644 index 0000000000000..e94e65d6b4e0b --- /dev/null +++ b/clang/test/Analysis/scan-build/Inputs/deduplication/header.h @@ -0,0 +1,4 @@ +int foo() { + int x = 0; + return 1 / x; +} diff --git a/clang/test/Analysis/scan-build/deduplication.test b/clang/test/Analysis/scan-build/deduplication.test new file mode 100644 index 0000000000000..56d888e5fc12a --- /dev/null +++ b/clang/test/Analysis/scan-build/deduplication.test @@ -0,0 +1,40 @@ +// FIXME: Actually, "perl". +REQUIRES: shell + +RUN: rm -rf %t.output_dir && mkdir %t.output_dir +RUN: %scan-build -o %t.output_dir \ +RUN: %clang -S %S/Inputs/deduplication/1.c \ +RUN: %S/Inputs/deduplication/2.c \ +RUN: | FileCheck %s -check-prefix CHECK-STDOUT + +RUN: ls %t.output_dir/*/ | FileCheck %s -check-prefix CHECK-FILENAMES + +RUN: rm -rf %t.output_dir && mkdir %t.output_dir +RUN: %scan-build -o %t.output_dir \ +RUN: -analyzer-config stable-report-filename=true \ +RUN: %clang -S %S/Inputs/deduplication/1.c \ +RUN: %S/Inputs/deduplication/2.c \ +RUN: | FileCheck %s -check-prefix CHECK-STDOUT + +RUN: ls %t.output_dir/*/ | FileCheck %s -check-prefix CHECK-FILENAMES + +RUN: rm -rf %t.output_dir && mkdir %t.output_dir +RUN: %scan-build -o %t.output_dir \ +RUN: -analyzer-config verbose-report-filename=true \ +RUN: %clang -S %S/Inputs/deduplication/1.c \ +RUN: %S/Inputs/deduplication/2.c \ +RUN: | FileCheck %s -check-prefix CHECK-STDOUT + +RUN: ls %t.output_dir/*/ | FileCheck %s -check-prefix CHECK-FILENAMES + +// Only one report file should be generated. + +CHECK-STDOUT: scan-build: Using '{{.*}}' for static analysis +CHECK-STDOUT: scan-build: 1 bug found. +CHECK-STDOUT: scan-build: Run 'scan-view {{.*}}' to examine bug reports. + + +CHECK-FILENAMES: index.html +CHECK-FILENAMES-NEXT: report-{{.*}}.html +CHECK-FILENAMES-NEXT: scanview.css +CHECK-FILENAMES-NEXT: sorttable.js diff --git a/clang/test/Analysis/scan-build/rebuild_index/rebuild_index.test b/clang/test/Analysis/scan-build/rebuild_index/rebuild_index.test index df07a2618d498..ab70435c60542 100644 --- a/clang/test/Analysis/scan-build/rebuild_index/rebuild_index.test +++ b/clang/test/Analysis/scan-build/rebuild_index/rebuild_index.test @@ -4,9 +4,8 @@ REQUIRES: shell RUN: rm -rf %t.output_dir && mkdir %t.output_dir RUN: cp %S/report-1.html %t.output_dir RUN: cp %S/report-2.html %t.output_dir -RUN: cp %S/report-3.html %t.output_dir RUN: mkdir %t.output_dir/subdirectory -RUN: cp %S/subdirectory/report-4.html %t.output_dir/subdirectory +RUN: cp %S/subdirectory/report-3.html %t.output_dir/subdirectory RUN: %scan-build --generate-index-only %t.output_dir @@ -15,16 +14,13 @@ RUN: ls %t.output_dir | FileCheck -check-prefix CHECK-FILES %s CHECK-FILES: index.html CHECK-FILES-NEXT: report-1.html CHECK-FILES-NEXT: report-2.html - -// report-3.html is a duplicate of report-1.html so it's not present. -CHECK-FILES-NOT: report-3.html CHECK-FILES-NEXT: scanview.css CHECK-FILES-NEXT: sorttable.js CHECK-FILES-NEXT: subdirectory RUN: ls %t.output_dir/subdirectory | FileCheck -check-prefix CHECK-SUB %s -CHECK-SUB: report-4.html +CHECK-SUB: report-3.html RUN: cat %t.output_dir/index.html | FileCheck -check-prefix CHECK-INDEX %s @@ -32,10 +28,9 @@ CHECK-INDEX: cat1 CHECK-INDEX-NEXT: bug1 CHECK-INDEX-NEXT: cat2 CHECK-INDEX-NEXT: bug2 -CHECK-INDEX-NEXT: cat4 -CHECK-INDEX-NEXT: bug4 +CHECK-INDEX-NEXT: cat3 +CHECK-INDEX-NEXT: bug3 CHECK-INDEX: report-1.html#EndPath CHECK-INDEX: report-2.html#EndPath -CHECK-INDEX-NOT: report-3.html#EndPath -CHECK-INDEX: subdirectory/report-4.html#EndPath +CHECK-INDEX: subdirectory/report-3.html#EndPath diff --git a/clang/test/Analysis/scan-build/rebuild_index/report-3.html b/clang/test/Analysis/scan-build/rebuild_index/report-3.html deleted file mode 100644 index ba446ae51ff9b..0000000000000 --- a/clang/test/Analysis/scan-build/rebuild_index/report-3.html +++ /dev/null @@ -1,8 +0,0 @@ -<!-- BUGTYPE bug1 --> -<!-- BUGFILE file1 --> -<!-- BUGPATHLENGTH 1 --> -<!-- BUGLINE 1 --> -<!-- BUGCATEGORY cat1 --> -<!-- BUGDESC desc1 --> -<!-- FUNCTIONNAME func1 --> -<!-- BUGMETAEND --> diff --git a/clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-3.html b/clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-3.html new file mode 100644 index 0000000000000..63ad78536959d --- /dev/null +++ b/clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-3.html @@ -0,0 +1,8 @@ +<!-- BUGTYPE bug3 --> +<!-- BUGFILE file3 --> +<!-- BUGPATHLENGTH 3 --> +<!-- BUGLINE 3 --> +<!-- BUGCATEGORY cat3 --> +<!-- BUGDESC desc3 --> +<!-- FUNCTIONNAME func3 --> +<!-- BUGMETAEND --> diff --git a/clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-4.html b/clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-4.html deleted file mode 100644 index 63c7e28c2d16c..0000000000000 --- a/clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-4.html +++ /dev/null @@ -1,8 +0,0 @@ -<!-- BUGTYPE bug4 --> -<!-- BUGFILE file4 --> -<!-- BUGPATHLENGTH 4 --> -<!-- BUGLINE 4 --> -<!-- BUGCATEGORY cat4 --> -<!-- BUGDESC desc4 --> -<!-- FUNCTIONNAME func4 --> -<!-- BUGMETAEND --> diff --git a/clang/tools/scan-build/bin/scan-build b/clang/tools/scan-build/bin/scan-build index 645f5507b6fa0..71e7ced087199 100755 --- a/clang/tools/scan-build/bin/scan-build +++ b/clang/tools/scan-build/bin/scan-build @@ -14,7 +14,6 @@ use strict; use warnings; use FindBin qw($RealBin); -use Digest::MD5; use File::Basename; use File::Find; use File::Copy qw(copy); @@ -268,27 +267,6 @@ sub SetHtmlEnv { $ENV{'CCC_ANALYZER_HTML'} = $Dir; } -##----------------------------------------------------------------------------## -# ComputeDigest - Compute a digest of the specified file. -##----------------------------------------------------------------------------## - -sub ComputeDigest { - my $FName = shift; - DieDiag("Cannot read $FName to compute Digest.\n") if (! -r $FName); - - # Use Digest::MD5. We don't have to be cryptographically secure. We're - # just looking for duplicate files that come from a non-malicious source. - # We use Digest::MD5 because it is a standard Perl module that should - # come bundled on most systems. - open(FILE, $FName) or DieDiag("Cannot open $FName when computing Digest.\n"); - binmode FILE; - my $Result = Digest::MD5->new->addfile(*FILE)->hexdigest; - close(FILE); - - # Return the digest. - return $Result; -} - ##----------------------------------------------------------------------------## # UpdatePrefix - Compute the common prefix of files. ##----------------------------------------------------------------------------## @@ -374,8 +352,6 @@ sub AddStatLine { # Sometimes a source file is scanned more than once, and thus produces # multiple error reports. We use a cache to solve this problem. -my %AlreadyScanned; - sub ScanFile { my $Index = shift; @@ -383,19 +359,6 @@ sub ScanFile { my $FName = shift; my $Stats = shift; - # Compute a digest for the report file. Determine if we have already - # scanned a file that looks just like it. - - my $digest = ComputeDigest("$Dir/$FName"); - - if (defined $AlreadyScanned{$digest}) { - # Redundant file. Remove it. - unlink("$Dir/$FName"); - return; - } - - $AlreadyScanned{$digest} = 1; - # At this point the report file is not world readable. Make it happen. chmod(0644, "$Dir/$FName"); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits