https://github.com/Discookie created https://github.com/llvm/llvm-project/pull/84166
This checker implements a null-pointer analysis checker using the data-flow framework. In particular, the checker finds cases where the pointer's nullability is checked after it has been dereferenced: ``` int f(int *ptr) { *ptr += 20; // note: one of the locations where the pointer's value cannot be null if (ptr) { *ptr += 42; // warning: pointer value is checked even though it cannot be null at this point return *ptr; } return 0; ``` It currently recognizes the following operations: - Dereference and arrow operators - Pointer-to-boolean cast - Assigning &obj, nullptr and other values --- Notable limitations: The checker only supports C++ due to the limitations of the framework (llvm/llvm-project#65301). Function calls taking a reference of a pointer are not handled yet. ``void external(int *&ref);`` The note tag is only displayed at one location, and not all operations are supported or displayed. More examples and limitations in the [checker docs](https://github.com/Discookie/llvm-project/blob/reverse-null/clang-tools-extra/docs/clang-tidy/checks/bugprone/null-check-after-dereference.rst). --- Results on open-source projects: https://codechecker-demo.eastus.cloudapp.azure.com/Default/runs?run=Discookie_reverse-null The reference-of-ptr limitation leads to a class of false positives across the tested projects [(example)](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=libwebm_libwebm-1.0.0.27_Discookie_reverse-null&detection-status=New&report-id=3497414&report-hash=e75b81fcd825616e0ec904ffef1c99ac&report-filepath=%2Flocal%2Fpersistent_docker%2FCSA-measurements-driver-2923%2Fmeasurements_workspace%2Flibwebm%2Fmkvparser.cpp). But the checker also found quite a few true positives, on LLVM: [1](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=llvm-project_llvmorg-12.0.0_Discookie_reverse-null&report-id=3503712&report-hash=63f82f7e62ecfea1c53c1896c29b2ef2&report-filepath=%2Flocal%2Fpersistent_docker%2FCSA-measurements-driver-2923%2Fmeasurements_workspace%2Fllvm-project%2Fclang%2Finclude%2Fclang%2FBasic%2FDiagnostic.h&is-unique=off&diff-type=New&review-status=Confirmed%20bug) [2](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=%2aDiscookie%2a&detection-status=New&report-id=3503771&report-hash=1f5c708ee2cfe736068e8f8a3fdc8634&report-filepath=%2Flocal%2Fpersistent_docker%2FCSA-measurements-driver-2923%2Fmeasurements_workspace%2Fllvm-project%2Fclang%2Flib%2FAnalysis%2FCalledOnceCheck.cpp&page=3&is-unique=off&diff-type=New&review-status=Unreviewed) [3](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=%2aDiscookie%2a&detection-status=New&report-id=3503770&report-hash=53c9082086ce7d320cf5e5aab78dbab6&report-filepath=%2Flocal%2Fpersistent_docker%2FCSA-measurements-driver-2923%2Fmeasurements_workspace%2Fllvm-project%2Fllvm%2Flib%2FMC%2FMCSymbol.cpp&page=3&is-unique=off&diff-type=New&review-status=Unreviewed) and a couple more, listed [here](https://github.com/Discookie/llvm-project/pull/1#issuecomment-1979214081). --- This checker corresponds to the the CERT rule [EXP34-C, Do not dereference null pointers](https://wiki.sei.cmu.edu/confluence/display/c/EXP34-C.+Do+not+dereference+null+pointers#EXP34C.Donotdereferencenullpointers-NoncompliantCodeExample.2), example 3. >From cf75a856dd75a87c7d2e9f307c206f4283e8b8f7 Mon Sep 17 00:00:00 2001 From: Viktor Cseh <viktor.c...@ericsson.com> Date: Tue, 28 Nov 2023 06:11:55 +0000 Subject: [PATCH] [clang][dataflow] Add null-check after dereference checker --- .../bugprone/BugproneTidyModule.cpp | 3 + .../clang-tidy/bugprone/CMakeLists.txt | 1 + .../NullCheckAfterDereferenceCheck.cpp | 175 +++++ .../bugprone/NullCheckAfterDereferenceCheck.h | 37 ++ clang-tools-extra/clangd/TidyProvider.cpp | 3 +- .../bugprone/null-check-after-dereference.rst | 158 +++++ .../bugprone/null-check-after-dereference.cpp | 330 +++++++++ .../Models/NullPointerAnalysisModel.h | 112 ++++ .../FlowSensitive/Models/CMakeLists.txt | 1 + .../Models/NullPointerAnalysisModel.cpp | 629 ++++++++++++++++++ .../Analysis/FlowSensitive/CMakeLists.txt | 1 + .../NullPointerAnalysisModelTest.cpp | 336 ++++++++++ 12 files changed, 1785 insertions(+), 1 deletion(-) create mode 100644 clang-tools-extra/clang-tidy/bugprone/NullCheckAfterDereferenceCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/bugprone/NullCheckAfterDereferenceCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/bugprone/null-check-after-dereference.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/null-check-after-dereference.cpp create mode 100644 clang/include/clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h create mode 100644 clang/lib/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.cpp create mode 100644 clang/unittests/Analysis/FlowSensitive/NullPointerAnalysisModelTest.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp index a8a23b045f80bb..ddd708dd513355 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp @@ -48,6 +48,7 @@ #include "NoEscapeCheck.h" #include "NonZeroEnumToBoolConversionCheck.h" #include "NotNullTerminatedResultCheck.h" +#include "NullCheckAfterDereferenceCheck.h" #include "OptionalValueConversionCheck.h" #include "ParentVirtualCallCheck.h" #include "PosixReturnCheck.h" @@ -180,6 +181,8 @@ class BugproneModule : public ClangTidyModule { CheckFactories.registerCheck<PosixReturnCheck>("bugprone-posix-return"); CheckFactories.registerCheck<ReservedIdentifierCheck>( "bugprone-reserved-identifier"); + CheckFactories.registerCheck<NullCheckAfterDereferenceCheck>( + "bugprone-null-check-after-dereference"); CheckFactories.registerCheck<SharedPtrArrayMismatchCheck>( "bugprone-shared-ptr-array-mismatch"); CheckFactories.registerCheck<SignalHandlerCheck>("bugprone-signal-handler"); diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt index 1cd6fb207d7625..5dbe761cb810e7 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt @@ -44,6 +44,7 @@ add_clang_library(clangTidyBugproneModule NoEscapeCheck.cpp NonZeroEnumToBoolConversionCheck.cpp NotNullTerminatedResultCheck.cpp + NullCheckAfterDereferenceCheck.cpp OptionalValueConversionCheck.cpp ParentVirtualCallCheck.cpp PosixReturnCheck.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/NullCheckAfterDereferenceCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NullCheckAfterDereferenceCheck.cpp new file mode 100644 index 00000000000000..1ba4edc1eaf0e4 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/NullCheckAfterDereferenceCheck.cpp @@ -0,0 +1,175 @@ +//===--- NullCheckAfterDereferenceCheck.cpp - clang-tidy-------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "NullCheckAfterDereferenceCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h" +#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/Any.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Error.h" +#include <memory> +#include <vector> + +namespace clang::tidy::bugprone { + +using ast_matchers::MatchFinder; +using dataflow::NullPointerAnalysisModel; +using dataflow::NullCheckAfterDereferenceDiagnoser; + +static constexpr llvm::StringLiteral FuncID("fun"); + +struct ExpandedResult { + SourceLocation WarningLoc; + std::optional<SourceLocation> DerefLoc; +}; + +using ExpandedResultType = std::pair<std::vector<ExpandedResult>, + std::vector<ExpandedResult>>; + +static std::optional<ExpandedResultType> analyzeFunction( + const FunctionDecl &FuncDecl) { + using dataflow::ControlFlowContext; + using dataflow::DataflowAnalysisState; + using llvm::Expected; + + ASTContext &ASTCtx = FuncDecl.getASTContext(); + + if (FuncDecl.getBody() == nullptr) { + return std::nullopt; + } + + Expected<ControlFlowContext> Context = + ControlFlowContext::build(FuncDecl, *FuncDecl.getBody(), ASTCtx); + if (!Context) + return std::nullopt; + + dataflow::DataflowAnalysisContext AnalysisContext( + std::make_unique<dataflow::WatchedLiteralsSolver>()); + dataflow::Environment Env(AnalysisContext, FuncDecl); + NullPointerAnalysisModel Analysis(ASTCtx); + NullCheckAfterDereferenceDiagnoser Diagnoser; + NullCheckAfterDereferenceDiagnoser::ResultType Diagnostics; + + using LatticeState = DataflowAnalysisState<NullPointerAnalysisModel::Lattice>; + using DetailMaybeLatticeStates = std::vector<std::optional<LatticeState>>; + + auto DiagnoserImpl = [&ASTCtx, &Diagnoser, &Diagnostics]( + const CFGElement &Elt, + const LatticeState &S) mutable -> void { + auto EltDiagnostics = Diagnoser.diagnose(ASTCtx, &Elt, S.Env); + llvm::move(EltDiagnostics.first, + std::back_inserter(Diagnostics.first)); + llvm::move(EltDiagnostics.second, + std::back_inserter(Diagnostics.second)); + }; + + Expected<DetailMaybeLatticeStates> + BlockToOutputState = dataflow::runDataflowAnalysis( + *Context, Analysis, Env, DiagnoserImpl); + + if (llvm::Error E = BlockToOutputState.takeError()) { + llvm::dbgs() << "Dataflow analysis failed: " << llvm::toString(std::move(E)) + << ".\n"; + return std::nullopt; + } + + ExpandedResultType ExpandedDiagnostics; + + llvm::transform(Diagnostics.first, + std::back_inserter(ExpandedDiagnostics.first), + [&](SourceLocation WarningLoc) -> ExpandedResult { + if (auto Val = Diagnoser.WarningLocToVal[WarningLoc]; + auto DerefExpr = Diagnoser.ValToDerefLoc[Val]) { + return {WarningLoc, DerefExpr->getBeginLoc()}; + } + + return {WarningLoc, std::nullopt}; + }); + + llvm::transform(Diagnostics.second, + std::back_inserter(ExpandedDiagnostics.second), + [&](SourceLocation WarningLoc) -> ExpandedResult { + if (auto Val = Diagnoser.WarningLocToVal[WarningLoc]; + auto DerefExpr = Diagnoser.ValToDerefLoc[Val]) { + return {WarningLoc, DerefExpr->getBeginLoc()}; + } + + return {WarningLoc, std::nullopt}; + }); + + return ExpandedDiagnostics; +} + +void NullCheckAfterDereferenceCheck::registerMatchers(MatchFinder *Finder) { + using namespace ast_matchers; + + auto hasPointerValue = + hasDescendant(NullPointerAnalysisModel::ptrValueMatcher()); + Finder->addMatcher( + decl(anyOf(functionDecl(unless(isExpansionInSystemHeader()), + // FIXME: Remove the filter below when lambdas are + // well supported by the check. + unless(hasDeclContext(cxxRecordDecl(isLambda()))), + hasBody(hasPointerValue)), + cxxConstructorDecl(hasAnyConstructorInitializer( + withInitializer(hasPointerValue))))) + .bind(FuncID), + this); +} + +void NullCheckAfterDereferenceCheck::check( + const MatchFinder::MatchResult &Result) { + if (Result.SourceManager->getDiagnostics().hasUncompilableErrorOccurred()) + return; + + const auto *FuncDecl = Result.Nodes.getNodeAs<FunctionDecl>(FuncID); + assert(FuncDecl && "invalid FuncDecl matcher"); + if (FuncDecl->isTemplated()) + return; + + if (const auto Diagnostics = analyzeFunction(*FuncDecl)) { + const auto& [CheckWhenNullLocations, CheckAfterDereferenceLocations] = + *Diagnostics; + + for (const auto [WarningLoc, DerefLoc] : CheckAfterDereferenceLocations) { + diag(WarningLoc, + "pointer value is checked even though " + "it cannot be null at this point"); + + if (DerefLoc) { + diag(*DerefLoc, + "one of the locations where the pointer's value cannot be null", + DiagnosticIDs::Note); + } + } + + for (const auto [WarningLoc, DerefLoc] : CheckWhenNullLocations) { + diag(WarningLoc, + "pointer value is checked but it can only be null at this point"); + + if (DerefLoc) { + diag(*DerefLoc, + "one of the locations where the pointer's value can only be null", + DiagnosticIDs::Note); + } + } + } +} + +} // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/clang-tidy/bugprone/NullCheckAfterDereferenceCheck.h b/clang-tools-extra/clang-tidy/bugprone/NullCheckAfterDereferenceCheck.h new file mode 100644 index 00000000000000..e5ac27e79deb11 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/NullCheckAfterDereferenceCheck.h @@ -0,0 +1,37 @@ +//===--- NullCheckAfterDereferenceCheck.h - clang-tidy ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NULLCHECKAFTERDEREFERENCECHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NULLCHECKAFTERDEREFERENCECHECK_H + +#include "../ClangTidyCheck.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +namespace clang::tidy::bugprone { + +/// Finds checks for pointer nullability after a pointer has already been +/// dereferenced, using the data-flow framework. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone/null-check-after-dereference.html +class NullCheckAfterDereferenceCheck : public ClangTidyCheck { +public: + NullCheckAfterDereferenceCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + + // The data-flow framework does not support C because of AST differences. + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus; + } + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; +}; + +} // namespace clang::tidy::bugprone + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NULLCHECKAFTERDEREFERENCECHECK_H diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp index b658a80559937c..cf7b4dff16070b 100644 --- a/clang-tools-extra/clangd/TidyProvider.cpp +++ b/clang-tools-extra/clangd/TidyProvider.cpp @@ -219,9 +219,10 @@ TidyProvider disableUnusableChecks(llvm::ArrayRef<std::string> ExtraBadChecks) { "-bugprone-use-after-move", // Alias for bugprone-use-after-move. "-hicpp-invalid-access-moved", - // Check uses dataflow analysis, which might hang/crash unexpectedly on + // Checks use dataflow analysis, which might hang/crash unexpectedly on // incomplete code. "-bugprone-unchecked-optional-access", + "-bugprone-null-check-after-dereference", // ----- Performance problems ----- diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/null-check-after-dereference.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/null-check-after-dereference.rst new file mode 100644 index 00000000000000..90e8897d7817ed --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/null-check-after-dereference.rst @@ -0,0 +1,158 @@ +.. title:: clang-tidy - bugprone-null-check-after-dereference + +bugprone-null-check-after-dereference +===================================== + +.. note:: + + This check uses a flow-sensitive static analysis to produce its + results. Therefore, it may be more resource intensive (RAM, CPU) than the + average clang-tidy check. + +This check identifies redundant pointer null-checks, by finding cases where the +pointer cannot be null at the location of the null-check. + +Redundant null-checks can signal faulty assumptions about the current value of +a pointer at different points in the program. Either the null-check is +redundant, or there could be a null-pointer dereference earlier in the program. + +.. code-block:: c++ + + int f(int *ptr) { + *ptr = 20; // note: one of the locations where the pointer's value cannot be null + // ... + if (ptr) { // bugprone: pointer is checked even though it cannot be null at this point + return *ptr; + } + return 0; + } + +Supported pointer operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pointer null-checks +------------------- + +The checker currently supports null-checks on pointers that use +``operator bool``, such as when being used as the condition +for an `if` statement. + +.. code-block:: c++ + + int f(int *ptr) { + if (ptr) { + if (ptr) { // bugprone: pointer is re-checked after its null-ness is already checked. + return *ptr; + } + + return ptr ? *ptr : 0; // bugprone: pointer is re-checked after its null-ness is already checked. + } + return 0; + } + +Pointer dereferences +-------------------- + +Pointer star- and arrow-dereferences are supported. + +.. code-block:: c++ + + struct S { + int val; + }; + + void f(int *ptr, S *wrapper) { + *ptr = 20; + wrapper->val = 15; + } + +Null-pointer and other value assignments +---------------------------------------- + +The checker supports assigning various values to pointers, making them *null* +or *non-null*. The checker also supports passing pointers of a pointer to +external functions. + +.. code-block:: c++ + + extern int *external(); + extern void refresh(int **ptr_ptr); + + int f() { + int *ptr_null = nullptr; + if (ptr_null) { // bugprone: pointer is checked where it cannot be non-null. + return *ptr_null; + } + + int *ptr = external(); + if (ptr) { // safe: external() could return either nullable or nonnull pointers. + return *ptr; + } + + int *ptr2 = external(); + *ptr2 = 20; + refresh(&ptr2); + if (ptr2) { // safe: pointer could be changed by refresh(). + return *ptr2; + } + return 0; + } + +Limitations +~~~~~~~~~~~ + +The check only supports C++ due to limitations in the data-flow framework. + +The check currently does not output the locations of the previous +dereferences, even in simple cases. + +The annotations ``_nullable`` and ``_nonnull`` are not supported. + +.. code-block:: c++ + + extern int *_nonnull external_nonnull(); + + int annotations() { + int *ptr = external_nonnull(); + + return ptr ? *ptr : 0; // false-negative: pointer is known to be non-null. + } + +Function calls taking a pointer value as a reference or a pointer-to-pointer are not supported. + +.. code-block:: c++ + extern int *external(); + extern void refresh_ref(int *&ptr); + extern void refresh_ptr(int **ptr); + + int extern_ref() { + int *ptr = external(); + *ptr = 20; + + refresh_ref(ptr); + refresh_ptr(&ptr); + + return ptr ? *ptr : 0; // false-positive: pointer could be changed by refresh_ref(). + } + +Note tags are currently appended to a single location, even if all paths ensure a pointer is not null. + +.. code-block:: c++ + int branches(int *p, bool b) { + if (b) { + *p = 42; // true-positive: note-tag appended here + } else { + *p = 20; // false-positive: note tag not appended here + } + + return ptr ? *ptr : 0; + } + +Declarations and some other operations are not supported by note tags yet. This can sometimes result in erroneous note tags being shown instead of the correct one. + +.. code-block:: c++ + int note_tags() { + int *ptr = nullptr; // false-negative: note tag not shown + + return ptr ? *ptr : 0; + } diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/null-check-after-dereference.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/null-check-after-dereference.cpp new file mode 100644 index 00000000000000..21e9eff4290f7a --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/null-check-after-dereference.cpp @@ -0,0 +1,330 @@ +// RUN: %check_clang_tidy %s bugprone-null-check-after-dereference %t + +struct S { + int a; +}; + +int warning_deref(int *p) { + *p = 42; + + if (p) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point [bugprone-null-check-after-dereference] + // CHECK-MESSAGES: :[[@LINE-4]]:3: note: one of the locations where the pointer's value cannot be null + // FIXME: If there's a direct path, make the error message more precise, ie. remove `one of the locations` + *p += 20; + return *p; + } else { + return 0; + } +} + +int warning_member(S *q) { + q->a = 42; + + if (q) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-4]]:3: note: one of the locations where the pointer's value cannot be null + q->a += 20; + return q->a; + } else { + return 0; + } +} + +int negative_warning(int *p) { + *p = 42; + + if (!p) { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-4]]:3: note: one of the locations where the pointer's value cannot be null + return 0; + } else { + *p += 20; + return *p; + } +} + +int no_warning(int *p, bool b) { + if (b) { + *p = 42; + } + + if (p) { + // CHECK-MESSAGES-NOT: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + *p += 20; + return *p; + } else { + return 0; + } +} + +int else_branch_warning(int *p, bool b) { + if (b) { + *p = 42; + } else { + *p = 20; + } + + if (p) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-7]]:5: note: one of the locations where the pointer's value cannot be null + return 0; + } else { + *p += 20; + return *p; + } +} + +int two_branches_warning(int *p, bool b) { + if (b) { + *p = 42; + } + + if (!b) { + *p = 20; + } + + if (p) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-9]]:5: note: one of the locations where the pointer's value cannot be null + return 0; + } else { + *p += 20; + return *p; + } +} + +int two_branches_reversed(int *p, bool b) { + if (!b) { + *p = 42; + } + + if (b) { + *p = 20; + } + + if (p) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-9]]:5: note: one of the locations where the pointer's value cannot be null + return 0; + } else { + *p += 20; + return *p; + } +} + + +int regular_assignment(int *p, int *q) { + *p = 42; + q = p; + + if (q) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-5]]:3: note: one of the locations where the pointer's value cannot be null + *p += 20; + return *p; + } else { + return 0; + } +} + +int nullptr_assignment(int *nullptr_param, bool b) { + *nullptr_param = 42; + int *nullptr_assigned; + + if (b) { + nullptr_assigned = nullptr; + } else { + nullptr_assigned = nullptr_param; + } + + if (nullptr_assigned) { + // CHECK-MESSAGES-NOT: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + *nullptr_assigned = 20; + return *nullptr_assigned; + } else { + return 0; + } +} + +extern int *fncall(); +extern void refresh_ref(int *&ptr); +extern void refresh_ptr(int **ptr); + +int fncall_reassignment(int *fncall_reassigned) { + *fncall_reassigned = 42; + + fncall_reassigned = fncall(); + + if (fncall_reassigned) { + // CHECK-MESSAGES-NOT: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + *fncall_reassigned = 42; + } + + fncall_reassigned = fncall(); + + *fncall_reassigned = 42; + + if (fncall_reassigned) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-4]]:3: note: one of the locations where the pointer's value cannot be null + *fncall_reassigned = 42; + } + + refresh_ptr(&fncall_reassigned); + + if (fncall_reassigned) { + // CHECK-MESSAGES-NOT: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + *fncall_reassigned = 42; + } + + refresh_ptr(&fncall_reassigned); + *fncall_reassigned = 42; + + if (fncall_reassigned) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-4]]:3: note: one of the locations where the pointer's value cannot be null + *fncall_reassigned = 42; + return *fncall_reassigned; + } else { + return 0; + } +} + +int chained_references(int *a, int *b, int *c, int *d, int *e) { + *a = 42; + + if (a) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-4]]:3: note: one of the locations where the pointer's value cannot be null + *b = 42; + } + + if (b) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-5]]:5: note: one of the locations where the pointer's value cannot be null + *c = 42; + } + + if (c) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-5]]:5: note: one of the locations where the pointer's value cannot be null + *d = 42; + } + + if (d) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-5]]:5: note: one of the locations where the pointer's value cannot be null + *e = 42; + } + + if (e) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-5]]:5: note: one of the locations where the pointer's value cannot be null + return *a; + } else { + return 0; + } +} + +int chained_if(int *a) { + if (!a) { + return 0; + } + + // FIXME: Negations are not tracked properly when the previous conditional returns + if (a) { + // --CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + *a += 20; + return *a; + } else { + return 0; + } +} + +int double_if(int *a) { + if (a) { + if (a) { + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: pointer value is checked even though it cannot be null at this point + // --CHECK-MESSAGES: :[[@LINE-3]]:5: note: one of the locations where the pointer's value cannot be null + // FIXME: Add warning for branch statements where pointer is not null afterwards + *a += 20; + return *a; + } else { + return 0; + } + } + + return 0; +} + +int while_loop(int *p, volatile bool *b) { + while (true) { + if (*b) { + *p = 42; + break; + } + } + + if (p) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-7]]:7: note: one of the locations where the pointer's value cannot be null + *p = 42; + return *p; + } else { + return 0; + } +} + +int ternary_op(int *p, int k) { + *p = 42; + + return p ? *p : k; + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: pointer value is checked even though it cannot be null at this point + // CHECK-MESSAGES: :[[@LINE-4]]:3: note: one of the locations where the pointer's value cannot be null +} + +// In an earlier version, the check would crash on C++17 structured bindings. +int cxx17_crash(int *p) { + *p = 42; + + int arr[2] = {1, 2}; + auto [a, b] = arr; + + return 0; +} + +void external_by_ref(int *&p); +void external_by_ptr(int **p); + +int external_invalidates() { + int *p = nullptr; + + external_by_ref(p); + + if (p) { + // FIXME: References of a pointer passed to external functions do not invalidate its value + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: pointer value is checked but it can only be null at this point + return *p; + } + + p = nullptr; + + external_by_ptr(&p); + + if (p) { + // FIXME: References of a pointer passed to external functions do not invalidate its value + // CHECK-MESSAGES: :[[@LINE-2]]:7: warning: pointer value is checked but it can only be null at this point + return *p; + } else { + return 0; + } +} + +int note_tags() { + // FIXME: Note tags are not appended to declarations + int *ptr = nullptr; + + return ptr ? *ptr : 0; + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: pointer value is checked but it can only be null at this point +} diff --git a/clang/include/clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h b/clang/include/clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h new file mode 100644 index 00000000000000..ae94c06206b035 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h @@ -0,0 +1,112 @@ +//===-- NullPointerAnalysisModel.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a generic null-pointer analysis model, used for finding +// pointer null-checks after the pointer has already been dereferenced. +// +// Only a limited set of operations are currently recognized. Notably, pointer +// arithmetic, null-pointer assignments and _nullable/_nonnull attributes are +// missing as of yet. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_ANALYSIS_FLOWSENSITIVE_MODELS_NULLPOINTERANALYSISMODEL_H +#define CLANG_ANALYSIS_FLOWSENSITIVE_MODELS_NULLPOINTERANALYSISMODEL_H + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/MapLattice.h" +#include "clang/Analysis/FlowSensitive/NoopLattice.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" + +namespace clang::dataflow { + +class NullPointerAnalysisModel + : public DataflowAnalysis<NullPointerAnalysisModel, NoopLattice> { +public: + /// A transparent wrapper around the function arguments of transferBranch(). + /// Does not outlive the call to transferBranch(). + struct TransferArgs { + bool Branch; + Environment &Env; + }; + +private: + CFGMatchSwitch<Environment> TransferMatchSwitch; + ASTMatchSwitch<Stmt, TransferArgs> BranchTransferMatchSwitch; + +public: + explicit NullPointerAnalysisModel(ASTContext &Context); + + static NoopLattice initialElement() { return {}; } + + static ast_matchers::StatementMatcher ptrValueMatcher(); + + // Used to initialize the storage locations of function arguments. + // Required to merge these values within the environment. + void initializeFunctionParameters(const ControlFlowContext &CFCtx, + Environment &Env); + + void transfer(const CFGElement &E, NoopLattice &State, Environment &Env); + + void transferBranch(bool Branch, const Stmt *E, NoopLattice &State, + Environment &Env); + + void join(QualType Type, const Value &Val1, const Environment &Env1, + const Value &Val2, const Environment &Env2, Value &MergedVal, + Environment &MergedEnv) override; + + ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) override; + + Value *widen(QualType Type, Value &Prev, const Environment &PrevEnv, + Value &Current, Environment &CurrentEnv) override; +}; + +class NullCheckAfterDereferenceDiagnoser { +public: + struct DiagnoseArgs { + llvm::DenseMap<const Value *, const Expr *> &ValToDerefLoc; + llvm::DenseMap<SourceLocation, const Value *> &WarningLocToVal; + const Environment &Env; + }; + + using ResultType = + std::pair<std::vector<SourceLocation>, std::vector<SourceLocation>>; + + // Maps a pointer's Value to a dereference, null-assignment, etc. + // This is used later to construct the Note tag. + llvm::DenseMap<const Value *, const Expr *> ValToDerefLoc; + // Maps Maps a warning's SourceLocation to its relevant Value. + llvm::DenseMap<SourceLocation, const Value *> WarningLocToVal; + +private: + CFGMatchSwitch<DiagnoseArgs, ResultType> DiagnoseMatchSwitch; + +public: + NullCheckAfterDereferenceDiagnoser(); + + ResultType diagnose(ASTContext &Ctx, const CFGElement *Elt, + const Environment &Env); +}; + +} // namespace clang::dataflow + +#endif // CLANG_ANALYSIS_FLOWSENSITIVE_MODELS_NULLPOINTERANALYSISMODEL_H diff --git a/clang/lib/Analysis/FlowSensitive/Models/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/Models/CMakeLists.txt index 89bbe8791eb2ca..6d365dabe6ae56 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/Models/CMakeLists.txt @@ -1,5 +1,6 @@ add_clang_library(clangAnalysisFlowSensitiveModels ChromiumCheckModel.cpp + NullPointerAnalysisModel.cpp UncheckedOptionalAccessModel.cpp LINK_LIBS diff --git a/clang/lib/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.cpp new file mode 100644 index 00000000000000..36c2e7988033ef --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.cpp @@ -0,0 +1,629 @@ +//===-- NullPointerAnalysisModel.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a generic null-pointer analysis model, used for finding +// pointer null-checks after the pointer has already been dereferenced. +// +// Only a limited set of operations are currently recognized. Notably, pointer +// arithmetic, null-pointer assignments and _nullable/_nonnull attributes are +// missing as of yet. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/MapLattice.h" +#include "clang/Analysis/FlowSensitive/NoopLattice.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" + +namespace clang::dataflow { + +namespace { +using namespace ast_matchers; + +constexpr char kCond[] = "condition"; +constexpr char kVar[] = "var"; +constexpr char kValue[] = "value"; +constexpr char kIsNonnull[] = "is-nonnull"; +constexpr char kIsNull[] = "is-null"; + +enum class SatisfiabilityResult { + // Returned when the value was not initialized yet. + Nullptr, + // Special value that signals that the boolean value can be anything. + // It signals that the underlying formulas are too complex to be calculated + // efficiently. + Top, + // Equivalent to the literal True in the current environment. + True, + // Equivalent to the literal False in the current environment. + False, + // Both True and False values could be produced with an appropriate set of + // conditions. + Unknown +}; + +using SR = SatisfiabilityResult; + +// FIXME: These AST matchers should also be exported via the +// NullPointerAnalysisModel class, for tests +auto ptrToVar(llvm::StringRef VarName = kVar) { + return traverse(TK_IgnoreUnlessSpelledInSource, + declRefExpr(hasType(isAnyPointer())).bind(VarName)); +} + +auto derefMatcher() { + return traverse( + TK_IgnoreUnlessSpelledInSource, + unaryOperator(hasOperatorName("*"), hasUnaryOperand(ptrToVar()))); +} + +auto arrowMatcher() { + return traverse( + TK_IgnoreUnlessSpelledInSource, + memberExpr(allOf(isArrow(), hasObjectExpression(ptrToVar())))); +} + +auto castExprMatcher() { + return castExpr(hasCastKind(CK_PointerToBoolean), + hasSourceExpression(ptrToVar())) + .bind(kCond); +} + +auto nullptrMatcher() { + return castExpr(hasCastKind(CK_NullToPointer)).bind(kVar); +} + +auto addressofMatcher() { + return unaryOperator(hasOperatorName("&")).bind(kVar); +} + +auto functionCallMatcher() { + return callExpr(hasDeclaration(functionDecl(returns(isAnyPointer())))) + .bind(kVar); +} + +auto assignMatcher() { + return binaryOperation(isAssignmentOperator(), hasLHS(ptrToVar()), + hasRHS(expr().bind(kValue))); +} + +auto anyPointerMatcher() { return expr(hasType(isAnyPointer())).bind(kVar); } + +// Only computes simple comparisons against the literals True and False in the +// environment. Faster, but produces many Unknown values. +SatisfiabilityResult shallowComputeSatisfiability(BoolValue *Val, + const Environment &Env) { + if (!Val) + return SR::Nullptr; + + if (isa<TopBoolValue>(Val)) + return SR::Top; + + if (Val == &Env.getBoolLiteralValue(true)) + return SR::True; + + if (Val == &Env.getBoolLiteralValue(false)) + return SR::False; + + return SR::Unknown; +} + +// Computes satisfiability by using the flow condition. Slower, but more +// precise. +SatisfiabilityResult computeSatisfiability(BoolValue *Val, + const Environment &Env) { + // Early return with the fast compute values. + if (SatisfiabilityResult ShallowResult = + shallowComputeSatisfiability(Val, Env); + ShallowResult != SR::Unknown) { + return ShallowResult; + } + + if (Env.proves(Val->formula())) + return SR::True; + + if (Env.proves(Env.arena().makeNot(Val->formula()))) + return SR::False; + + return SR::Unknown; +} + +inline BoolValue &getVal(llvm::StringRef Name, Value &RootValue) { + return *cast<BoolValue>(RootValue.getProperty(Name)); +} + +// Assigns initial pointer null- and nonnull-values to a given Value. +void initializeRootValue(Value &RootValue, Environment &Env) { + Arena &A = Env.arena(); + + BoolValue *IsNull = cast_or_null<BoolValue>(RootValue.getProperty(kIsNull)); + BoolValue *IsNonnull = + cast_or_null<BoolValue>(RootValue.getProperty(kIsNonnull)); + + if (!IsNull) { + IsNull = &A.makeAtomValue(); + RootValue.setProperty(kIsNull, *IsNull); + } + + if (!IsNonnull) { + IsNonnull = &A.makeAtomValue(); + RootValue.setProperty(kIsNonnull, *IsNonnull); + } + + // If the pointer cannot have either a null or nonull value, the state is + // unreachable. + // FIXME: This condition is added in all cases when getValue() is called. + // The reason is that on a post-visit step, the initialized Values are used, + // but the flow condition does not have this constraint yet. + // The framework provides deduplication for constraints, so should not have a + // performance impact. + Env.assume(A.makeOr(IsNull->formula(), IsNonnull->formula())); +} + +void setGLValue(const Expr &E, Value &Val, Environment &Env) { + StorageLocation *SL = Env.getStorageLocation(E); + if (!SL) { + SL = &Env.createStorageLocation(E); + Env.setStorageLocation(E, *SL); + } + + Env.setValue(*SL, Val); +} + +void setUnknownValue(const Expr &E, Value &Val, Environment &Env) { + if (E.isGLValue()) + setGLValue(E, Val, Env); + else if (E.isPRValue()) + Env.setValue(E, Val); + else + llvm_unreachable("all value cases covered"); +} + +Value *getValue(const Expr &Var, Environment &Env) { + if (auto *EnvVal = Env.getValue(Var)) { + // FIXME: The framework usually creates the values for us, but without the + // null-properties. + initializeRootValue(*EnvVal, Env); + + return EnvVal; + } + + auto *RootValue = Env.createValue(Var.getType()); + + if (!RootValue) + return nullptr; + + initializeRootValue(*RootValue, Env); + + setGLValue(Var, *RootValue, Env); + + return RootValue; +} + +void matchDereferenceExpr(const Stmt *stmt, + const MatchFinder::MatchResult &Result, + Environment &Env) { + const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar); + assert(Var != nullptr); + + auto *RootValue = getValue(*Var, Env); + if (!RootValue) { + return; + } + + Env.assume(Env.arena().makeNot(getVal(kIsNull, *RootValue).formula())); +} + +void matchCastExpr(const CastExpr *cond, const MatchFinder::MatchResult &Result, + NullPointerAnalysisModel::TransferArgs &Data) { + auto [IsNonnullBranch, Env] = Data; + + const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar); + assert(Var != nullptr); + + auto *RootValue = getValue(*Var, Env); + if (!RootValue) { + return; + } + + auto *NewRootValue = Env.createValue(Var->getType()); + if (!NewRootValue) + return; + + setGLValue(*Var, *NewRootValue, Env); + + Arena &A = Env.arena(); + BoolValue &IsNonnull = getVal(kIsNonnull, *RootValue); + BoolValue &IsNull = getVal(kIsNull, *RootValue); + + if (IsNonnullBranch) { + Env.assume(A.makeNot(IsNull.formula())); + NewRootValue->setProperty(kIsNull, Env.getBoolLiteralValue(false)); + + NewRootValue->setProperty(kIsNonnull, IsNonnull); + } else { + NewRootValue->setProperty(kIsNull, IsNull); + + Env.assume(A.makeNot(IsNonnull.formula())); + NewRootValue->setProperty(kIsNonnull, Env.getBoolLiteralValue(false)); + } +} + +void matchNullptrExpr(const Expr *expr, const MatchFinder::MatchResult &Result, + Environment &Env) { + const auto *PrVar = Result.Nodes.getNodeAs<Expr>(kVar); + assert(PrVar != nullptr); + + auto *RootValue = Env.getValue(*PrVar); + if (!RootValue) { + RootValue = Env.createValue(PrVar->getType()); + + if (!RootValue) { + return; + } + } + + RootValue->setProperty(kIsNull, Env.getBoolLiteralValue(true)); + RootValue->setProperty(kIsNonnull, Env.getBoolLiteralValue(false)); + Env.setValue(*PrVar, *RootValue); +} + +void matchAddressofExpr(const Expr *expr, + const MatchFinder::MatchResult &Result, + Environment &Env) { + const auto *PrVar = Result.Nodes.getNodeAs<Expr>(kVar); + assert(PrVar != nullptr); + + auto *RootValue = Env.createValue(PrVar->getType()); + if (!RootValue) { + return; + } + + RootValue->setProperty(kIsNull, Env.getBoolLiteralValue(false)); + RootValue->setProperty(kIsNonnull, Env.getBoolLiteralValue(true)); + Env.setValue(*PrVar, *RootValue); +} + +void matchAnyPointerExpr(const Expr *fncall, + const MatchFinder::MatchResult &Result, + Environment &Env) { + // This is not necessarily a prvalue, since operators such as prefix ++ are + // lvalues. + const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar); + assert(Var != nullptr); + + // Initialize to (Unknown, Unknown) + if (Env.getValue(*Var)) + return; + + auto *RootValue = Env.createValue(Var->getType()); + if (!RootValue) + return; + + initializeRootValue(*RootValue, Env); + setUnknownValue(*Var, *RootValue, Env); +} + +NullCheckAfterDereferenceDiagnoser::ResultType +diagnoseDerefLocation(const Expr *Deref, const MatchFinder::MatchResult &Result, + NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) { + auto [ValToDerefLoc, WarningLocToVal, Env] = Data; + + const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar); + assert(Var != nullptr); + + auto *RootValue = Env.getValue(*Var); + if (!RootValue) + return {}; + + // Dereferences are always the highest priority when giving a single location + // FIXME: Do not replace other dereferences, only other Expr's + auto It = ValToDerefLoc.try_emplace(RootValue, nullptr).first; + + It->second = Deref; + + return {}; +} + +NullCheckAfterDereferenceDiagnoser::ResultType +diagnoseAssignLocation(const Expr *Assign, + const MatchFinder::MatchResult &Result, + NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) { + auto [ValToDerefLoc, WarningLocToVal, Env] = Data; + + const auto *RHSVar = Result.Nodes.getNodeAs<Expr>(kValue); + assert (RHSVar != nullptr); + + auto *RHSValue = Env.getValue(*RHSVar); + if (!RHSValue) + return {}; + + auto [It, Inserted] = ValToDerefLoc.try_emplace(RHSValue, nullptr); + + if (Inserted) + It->second = Assign; + + return {}; +} + +NullCheckAfterDereferenceDiagnoser::ResultType +diagnoseCastExpr(const CastExpr *Stmt, const MatchFinder::MatchResult &Result, + const NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) { + + auto [ValToDerefLoc, WarningLocToVal, Env] = Data; + + const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar); + assert(Var != nullptr); + + if (auto *RootValue = Env.getValue(*Var)) { + // FIXME: The framework usually creates the values for us, but without the + // nullability properties. + if (RootValue->getProperty(kIsNull) && RootValue->getProperty(kIsNonnull)) { + bool IsNull = Env.allows(getVal(kIsNull, *RootValue).formula()); + bool IsNonnull = Env.allows(getVal(kIsNonnull, *RootValue).formula()); + + if (!IsNull && IsNonnull) { + bool Inserted = + WarningLocToVal.try_emplace(Var->getBeginLoc(), RootValue).second; + assert(Inserted && "multiple warnings at the same source location"); + + return {{}, {Var->getBeginLoc()}}; + } + + if (IsNull && !IsNonnull) { + bool Inserted = + WarningLocToVal.try_emplace(Var->getBeginLoc(), RootValue).second; + assert(Inserted && "multiple warnings at the same source location"); + + return {{Var->getBeginLoc()}, {}}; + } + } + + // If no matches are found, the cast itself signals a special location + auto [It, Inserted] = ValToDerefLoc.try_emplace(RootValue, nullptr); + + if (Inserted) + It->second = Stmt; + } + + return {}; +} + +auto buildTransferMatchSwitch() { + return CFGMatchSwitchBuilder<Environment>() + .CaseOfCFGStmt<Stmt>(derefMatcher(), matchDereferenceExpr) + .CaseOfCFGStmt<Stmt>(arrowMatcher(), matchDereferenceExpr) + .CaseOfCFGStmt<Expr>(nullptrMatcher(), matchNullptrExpr) + .CaseOfCFGStmt<Expr>(addressofMatcher(), matchAddressofExpr) + .CaseOfCFGStmt<Expr>(functionCallMatcher(), matchAnyPointerExpr) + .CaseOfCFGStmt<Expr>(anyPointerMatcher(), matchAnyPointerExpr) + .Build(); +} + +auto buildBranchTransferMatchSwitch() { + return ASTMatchSwitchBuilder<Stmt, + NullPointerAnalysisModel::TransferArgs>() + .CaseOf<CastExpr>(castExprMatcher(), matchCastExpr) + .Build(); +} + +auto buildDiagnoseMatchSwitch() { + return CFGMatchSwitchBuilder<NullCheckAfterDereferenceDiagnoser::DiagnoseArgs, + NullCheckAfterDereferenceDiagnoser::ResultType>() + .CaseOfCFGStmt<Expr>(derefMatcher(), diagnoseDerefLocation) + .CaseOfCFGStmt<Expr>(arrowMatcher(), diagnoseDerefLocation) + .CaseOfCFGStmt<Expr>(assignMatcher(), diagnoseAssignLocation) + .CaseOfCFGStmt<CastExpr>(castExprMatcher(), diagnoseCastExpr) + .Build(); +} + +} // namespace + +NullPointerAnalysisModel::NullPointerAnalysisModel( + ASTContext &Context) + : DataflowAnalysis<NullPointerAnalysisModel, NoopLattice>(Context), + TransferMatchSwitch(buildTransferMatchSwitch()), + BranchTransferMatchSwitch(buildBranchTransferMatchSwitch()) {} + +ast_matchers::StatementMatcher +NullPointerAnalysisModel::ptrValueMatcher() { + return ptrToVar(); +} + +void NullPointerAnalysisModel::transfer(const CFGElement &E, + NoopLattice &State, + Environment &Env) { + TransferMatchSwitch(E, getASTContext(), Env); +} + +void NullPointerAnalysisModel::transferBranch(bool Branch, const Stmt *E, + NoopLattice &State, + Environment &Env) { + if (!E) + return; + + TransferArgs Args = {Branch, Env}; + BranchTransferMatchSwitch(*E, getASTContext(), Args); +} + +void NullPointerAnalysisModel::join(QualType Type, const Value &Val1, + const Environment &Env1, + const Value &Val2, + const Environment &Env2, + Value &MergedVal, + Environment &MergedEnv) { + if (!Type->isAnyPointerType()) + return; + + const auto MergeValues = [&](llvm::StringRef Name) -> BoolValue & { + BoolValue *LHSVar = cast_or_null<BoolValue>(Val1.getProperty(Name)); + BoolValue *RHSVar = cast_or_null<BoolValue>(Val2.getProperty(Name)); + + SatisfiabilityResult LHSResult = computeSatisfiability(LHSVar, Env1); + SatisfiabilityResult RHSResult = computeSatisfiability(RHSVar, Env2); + + // Handle special cases. + if (LHSResult == SR::Top || RHSResult == SR::Top) { + return MergedEnv.makeTopBoolValue(); + } else if (LHSResult == RHSResult) { + switch (LHSResult) { + case SR::Nullptr: + return MergedEnv.makeAtomicBoolValue(); + case SR::Top: + return *LHSVar; + case SR::True: + return MergedEnv.getBoolLiteralValue(true); + case SR::False: + return MergedEnv.getBoolLiteralValue(false); + case SR::Unknown: + if (MergedEnv.proves(MergedEnv.arena().makeEquals(LHSVar->formula(), + RHSVar->formula()))) + return *LHSVar; + + return MergedEnv.makeTopBoolValue(); + } + } + + return MergedEnv.makeTopBoolValue(); + }; + + BoolValue &NonnullValue = MergeValues(kIsNonnull); + BoolValue &NullValue = MergeValues(kIsNull); + + MergedVal.setProperty(kIsNonnull, NonnullValue); + MergedVal.setProperty(kIsNull, NullValue); + + MergedEnv.assume(MergedEnv.makeOr(NonnullValue, NullValue).formula()); +} + +ComparisonResult NullPointerAnalysisModel::compare( + QualType Type, const Value &Val1, const Environment &Env1, + const Value &Val2, const Environment &Env2) { + + if (!Type->isAnyPointerType()) + return ComparisonResult::Unknown; + + // Evaluate values, but different values compare to Unknown. + auto CompareValues = [&](llvm::StringRef Name) -> ComparisonResult { + BoolValue *LHSVar = cast_or_null<BoolValue>(Val1.getProperty(Name)); + BoolValue *RHSVar = cast_or_null<BoolValue>(Val2.getProperty(Name)); + + SatisfiabilityResult LHSResult = computeSatisfiability(LHSVar, Env1); + SatisfiabilityResult RHSResult = computeSatisfiability(RHSVar, Env2); + + if (LHSResult == SR::Top || RHSResult == SR::Top) + return ComparisonResult::Same; + + if (LHSResult == SR::Unknown || RHSResult == SR::Unknown) + return ComparisonResult::Unknown; + + if (LHSResult == RHSResult) + return ComparisonResult::Same; + + return ComparisonResult::Different; + }; + + ComparisonResult NullComparison = CompareValues(kIsNull); + ComparisonResult NonnullComparison = CompareValues(kIsNonnull); + + if (NullComparison == ComparisonResult::Different || + NonnullComparison == ComparisonResult::Different) + return ComparisonResult::Different; + + if (NullComparison == ComparisonResult::Unknown || + NonnullComparison == ComparisonResult::Unknown) + return ComparisonResult::Unknown; + + return ComparisonResult::Same; +} + +// Different in that it replaces differing boolean values with Top. +ComparisonResult compareAndReplace(QualType Type, Value &Val1, + const Environment &Env1, Value &Val2, + Environment &Env2) { + + if (!Type->isAnyPointerType()) + return ComparisonResult::Unknown; + + auto FastCompareValues = [&](llvm::StringRef Name) -> ComparisonResult { + BoolValue *LHSVar = cast_or_null<BoolValue>(Val1.getProperty(Name)); + BoolValue *RHSVar = cast_or_null<BoolValue>(Val2.getProperty(Name)); + + SatisfiabilityResult LHSResult = shallowComputeSatisfiability(LHSVar, Env1); + SatisfiabilityResult RHSResult = shallowComputeSatisfiability(RHSVar, Env2); + + if (LHSResult == SR::Top || RHSResult == SR::Top) { + Val2.setProperty(Name, Env2.makeTopBoolValue()); + return ComparisonResult::Same; + } + + if (LHSResult == SR::Unknown || RHSResult == SR::Unknown) + return ComparisonResult::Unknown; + + if (LHSResult == RHSResult) + return ComparisonResult::Same; + + Val2.setProperty(Name, Env2.makeTopBoolValue()); + return ComparisonResult::Different; + }; + + ComparisonResult NullComparison = FastCompareValues(kIsNull); + ComparisonResult NonnullComparison = FastCompareValues(kIsNonnull); + + if (NullComparison == ComparisonResult::Different || + NonnullComparison == ComparisonResult::Different) + return ComparisonResult::Different; + + if (NullComparison == ComparisonResult::Unknown || + NonnullComparison == ComparisonResult::Unknown) + return ComparisonResult::Unknown; + + return ComparisonResult::Same; +} + +Value *NullPointerAnalysisModel::widen(QualType Type, Value &Prev, + const Environment &PrevEnv, + Value &Current, + Environment &CurrentEnv) { + if (!Type->isAnyPointerType()) + return nullptr; + + switch (compareAndReplace(Type, Prev, PrevEnv, Current, CurrentEnv)) { + case ComparisonResult::Same: + return &Prev; + case ComparisonResult::Unknown: + return nullptr; + case ComparisonResult::Different: + return &Current; + } +} + +NullCheckAfterDereferenceDiagnoser::NullCheckAfterDereferenceDiagnoser() + : DiagnoseMatchSwitch(buildDiagnoseMatchSwitch()) {} + +NullCheckAfterDereferenceDiagnoser::ResultType +NullCheckAfterDereferenceDiagnoser::diagnose(ASTContext &Ctx, + const CFGElement *Elt, + const Environment &Env) { + DiagnoseArgs Args = {ValToDerefLoc, WarningLocToVal, Env}; + return DiagnoseMatchSwitch(*Elt, Ctx, Args); +} + +} // namespace clang::dataflow diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt index 94160d949637cf..2a301a9c48fd25 100644 --- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt @@ -15,6 +15,7 @@ add_clang_unittest(ClangAnalysisFlowSensitiveTests MapLatticeTest.cpp MatchSwitchTest.cpp MultiVarConstantPropagationTest.cpp + NullPointerAnalysisModelTest.cpp RecordOpsTest.cpp SignAnalysisTest.cpp SimplifyConstraintsTest.cpp diff --git a/clang/unittests/Analysis/FlowSensitive/NullPointerAnalysisModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/NullPointerAnalysisModelTest.cpp new file mode 100644 index 00000000000000..18ab99e8c20cd7 --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/NullPointerAnalysisModelTest.cpp @@ -0,0 +1,336 @@ +//===- NullPointerAnalysisModelTest.cpp -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a test for pointer nullability, specifically focused on +// finding invalid dereferences, and unnecessary null-checks. +// Only a limited set of operations are currently recognized. Notably, pointer +// arithmetic, null-pointer assignments and _nullable/_nonnull attributes are +// missing as of yet. +// +// FIXME: Port over to the new type of dataflow test infrastructure +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/MapLattice.h" +#include "clang/Analysis/FlowSensitive/NoopLattice.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/ADT/StringMapEntry.h" +#include "llvm/Testing/Support/Error.h" +#include "TestingSupport.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include <cstdint> +#include <memory> +#include <ostream> +#include <string> +#include <utility> + +namespace clang::dataflow { +namespace { +using namespace ast_matchers; + +constexpr char kVar[] = "var"; +// constexpr char kKnown[] = "is-known"; +constexpr char kIsNonnull[] = "is-nonnull"; +constexpr char kIsNull[] = "is-null"; + +constexpr char kBoolTrue[] = "true"; +constexpr char kBoolFalse[] = "false"; +constexpr char kBoolInvalid[] = "invalid"; +constexpr char kBoolUnknown[] = "unknown"; +constexpr char kBoolNullptr[] = "is-nullptr"; + +std::string checkNullabilityState(BoolValue *value, const Environment &Env) { + if (value == nullptr) { + return std::string(kBoolNullptr); + } else { + int boolState = 0; + if (Env.proves(value->formula())) { + boolState |= 1; + } + if (Env.proves(Env.makeNot(*value).formula())) { + boolState |= 2; + } + switch (boolState) { + case 0: + return kBoolUnknown; + case 1: + return kBoolTrue; + case 2: + return kBoolFalse; + // If both the condition and its negation are satisfied, the program point + // is proven to be impossible. + case 3: + return kBoolInvalid; + default: + llvm_unreachable("all cases covered in switch"); + } + } +} + +// We are binding to the address of the Decl here, as the Expr has a different +// address than the one stored in the framework. +auto nameToVar(llvm::StringRef name) { + return declRefExpr(hasType(isAnyPointer()), + hasDeclaration(namedDecl(hasName(name)).bind(kVar))) + ; +} + +using ::clang::dataflow::test::AnalysisInputs; +using ::clang::dataflow::test::AnalysisOutputs; +using ::clang::dataflow::test::checkDataflow; +using ::llvm::IsStringMapEntry; +using ::testing::Args; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +MATCHER_P2(HasNullabilityState, null, nonnull, + std::string("has nullability state where isNull is ") + null + + " and isNonnull is " + nonnull) { + return checkNullabilityState( + cast_or_null<BoolValue>(arg.first->getProperty(kIsNonnull)), + *arg.second) == nonnull && + checkNullabilityState( + cast_or_null<BoolValue>(arg.first->getProperty(kIsNull)), + *arg.second) == null; +} + +MATCHER_P3(HoldsVariable, name, output, checks, + ((negation ? "doesn't hold" : "holds") + + llvm::StringRef(" a variable in its environment that ") + + ::testing::DescribeMatcher<std::pair<Value *, Environment *>>( + checks, negation)) + .str()) { + auto MatchResults = match(functionDecl(hasDescendant(nameToVar(name))), + *output.Target, output.ASTCtx); + assert(!MatchResults.empty()); + + const auto *pointerExpr = MatchResults[0].template getNodeAs<ValueDecl>(kVar); + assert(pointerExpr != nullptr); + + const auto *ExprValue = arg.Env.getValue(*pointerExpr); + + if (ExprValue == nullptr) { + return false; + } + + return ExplainMatchResult(checks, std::pair{ExprValue, &arg.Env}, + result_listener); +} + +template <typename MatcherFactory> +void ExpectDataflowResult(llvm::StringRef Code, MatcherFactory Expectations) { + ASSERT_THAT_ERROR( + checkDataflow<NullPointerAnalysisModel>( + AnalysisInputs<NullPointerAnalysisModel>( + Code, hasName("fun"), + [](ASTContext &C, Environment &Env) { + return NullPointerAnalysisModel(C); + }) + .withASTBuildArgs({"-fsyntax-only", "-std=c++17"}), + /*VerifyResults=*/[&Expectations]( + const llvm::StringMap<DataflowAnalysisState< + NullPointerAnalysisModel::Lattice>> &Results, + const AnalysisOutputs &Output) { + EXPECT_THAT(Results, Expectations(Output)); + }), + llvm::Succeeded()); +} + +TEST(NullCheckAfterDereferenceTest, DereferenceTypes) { + std::string Code = R"( + struct S { + int a; + }; + + void fun(int *p, S *q) { + *p = 0; // [[p]] + + q->a = 20; // [[q]] + } + )"; + ExpectDataflowResult(Code, [](const AnalysisOutputs &Output) -> auto { + return UnorderedElementsAre( + IsStringMapEntry( + "p", HoldsVariable("p", Output, + HasNullabilityState(kBoolFalse, kBoolTrue))), + IsStringMapEntry( + "q", HoldsVariable("q", Output, + HasNullabilityState(kBoolFalse, kBoolTrue)))); + }); +} + +TEST(NullCheckAfterDereferenceTest, ConditionalTypes) { + std::string Code = R"( + void fun(int *p) { + if (p) { + (void)0; // [[p_true]] + } else { + (void)0; // [[p_false]] + } + + // FIXME: Test ternary op + } + )"; + ExpectDataflowResult(Code, [](const AnalysisOutputs &Output) -> auto { + return UnorderedElementsAre( + IsStringMapEntry("p_true", HoldsVariable("p", Output, + HasNullabilityState( + kBoolFalse, kBoolTrue))), + IsStringMapEntry("p_false", HoldsVariable("p", Output, + HasNullabilityState( + kBoolTrue, kBoolFalse)))); + }); +} + +TEST(NullCheckAfterDereferenceTest, UnrelatedCondition) { + std::string Code = R"( + void fun(int *p, bool b) { + if (b) { + *p = 42; + (void)0; // [[p_b_true]] + } else { + (void)0; // [[p_b_false]] + } + + (void)0; // [[p_merged]] + + if (b) { + (void)0; // [[b_true]] + + if (p) { + (void)0; // [[b_p_true]] + } else { + (void)0; // [[b_p_false]] + } + } + } + )"; + ExpectDataflowResult(Code, [](const AnalysisOutputs &Output) -> auto { + return UnorderedElementsAre( + IsStringMapEntry("p_b_true", HoldsVariable("p", Output, + HasNullabilityState( + kBoolFalse, kBoolTrue))), + IsStringMapEntry( + "p_b_false", + HoldsVariable("p", Output, + HasNullabilityState(kBoolUnknown, kBoolUnknown))), + IsStringMapEntry( + "p_merged", + HoldsVariable("p", Output, + HasNullabilityState(kBoolUnknown, kBoolUnknown))), + IsStringMapEntry("b_true", HoldsVariable("p", Output, + HasNullabilityState( + kBoolFalse, kBoolTrue))), + IsStringMapEntry("b_p_true", HoldsVariable("p", Output, + HasNullabilityState( + kBoolFalse, + kBoolTrue))), + // FIXME: Flow condition is false in this last entry, + // should test that instead of an invalid state + IsStringMapEntry( + "b_p_false", + HoldsVariable("p", Output, + HasNullabilityState(kBoolInvalid, kBoolInvalid)))); + }); +} + +TEST(NullCheckAfterDereferenceTest, AssignmentOfCommonValues) { + std::string Code = R"( + using size_t = decltype(sizeof(void*)); + extern void *malloc(size_t); + extern int *ext(); + + void fun() { + int *p = (int*)malloc(sizeof(int)); + (void)0; // [[p_malloc]] + + if (p) { + *p = 42; // [[p_true]] + } else { + (void)0; // [[p_false]] + } + + (void)0; // [[p_merge]] + + p = nullptr; // [[p_nullptr]] + + p = ext(); // [[p_extern]] + } + )"; + ExpectDataflowResult(Code, [](const AnalysisOutputs &Output) -> auto { + return UnorderedElementsAre( + // FIXME: Recognize that malloc (and other functions) are nullable + IsStringMapEntry( + "p_malloc", + HoldsVariable("p", Output, + HasNullabilityState(kBoolUnknown, kBoolUnknown))), + IsStringMapEntry( + "p_true", + HoldsVariable("p", Output, + HasNullabilityState(kBoolFalse, kBoolTrue))), + IsStringMapEntry( + "p_false", + HoldsVariable("p", Output, + HasNullabilityState(kBoolTrue, kBoolFalse))), + IsStringMapEntry( + "p_merge", + HoldsVariable("p", Output, + HasNullabilityState(kBoolUnknown, kBoolUnknown))), + IsStringMapEntry( + "p_nullptr", + HoldsVariable("p", Output, + HasNullabilityState(kBoolTrue, kBoolFalse))), + IsStringMapEntry( + "p_extern", + HoldsVariable("p", Output, + HasNullabilityState(kBoolUnknown, kBoolUnknown)))); + }); +} + +TEST(NullCheckAfterDereferenceTest, MergeValues) { + std::string Code = R"( + using size_t = decltype(sizeof(void*)); + extern void *malloc(size_t); + + void fun(int *p, bool b) { + if (p) { + *p = 10; + } else { + p = (int*)malloc(sizeof(int)); + } + + (void)0; // [[p_merge]] + } + )"; + ExpectDataflowResult(Code, [](const AnalysisOutputs &Output) -> auto { + return UnorderedElementsAre(IsStringMapEntry( + "p_merge", + // Even if a pointer was nonnull on a branch, it is worth keeping the + // more complex formula for more precise analysis. + HoldsVariable("p", Output, + HasNullabilityState(kBoolUnknown, kBoolUnknown)))); + }); +} + +} // namespace +} // namespace clang::dataflow _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits