=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com> Message-ID: In-Reply-To: <llvm/llvm-project/pull/67572/cl...@github.com>
https://github.com/DonatNagyE updated https://github.com/llvm/llvm-project/pull/67572 >From 7bd280e2da3f2ee8fe5fd7086a4704331f21b435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Wed, 6 Sep 2023 13:39:27 +0200 Subject: [PATCH 1/4] [analyzer][NFC] Simplifications in ArrayBoundV2 I'm planning to improve diagnostics generation in `ArrayBoundCheckerV2` but before that I'm refactoring the source code to clean up some over-complicated code and an inaccurate comment. Changes in this commit: - Remove the `mutable std::unique_ptr<BugType>` boilerplate, because it's no longer needed. - Remove the code duplication between the methods `reportOOB()` and `reportTaintedOOB()`. - Eliminate the class `RegionRawOffsetV2` because it's just a "reinvent the wheel" version of `std::pair` and it was used only once, as a temporary object that was immediately decomposed. (I suspect that `RegionRawOffset` in MemRegion.cpp could also be eliminated.) - Flatten the code of `computeOffset()` which had contained six nested indentation levels before this commit. - Ensure that `computeOffset()` returns `std::nullopt` instead of a `{Region, <zero array index>}` pair in the case when it encounters a `Location` that is not an `ElementRegion`. This ensures that the `checkLocation` callback returns early when it handles a memory access where it has "nothing to do" (no subscript operation or equivalent pointer arithmetic). (Note that this is still NFC because zero is a valid index everywhere, so the old logic without this shortcut eventually reached the same conclusion.) - Correct a wrong explanation comment in `getSimplifiedOffsets()`. --- .../Checkers/ArrayBoundCheckerV2.cpp | 221 +++++++----------- 1 file changed, 86 insertions(+), 135 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index db4a2fcea9b2cdd..c2ffcdf5e306d1f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -32,15 +32,13 @@ using namespace taint; namespace { class ArrayBoundCheckerV2 : public Checker<check::Location> { - mutable std::unique_ptr<BugType> BT; - mutable std::unique_ptr<BugType> TaintBT; + BugType BT{this, "Out-of-bound access"}; + BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; - enum OOB_Kind { OOB_Precedes, OOB_Excedes }; + enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; - void reportOOB(CheckerContext &C, ProgramStateRef errorState, - OOB_Kind kind) const; - void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState, - SVal TaintedSVal) const; + void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind, + SVal TaintedSVal = UnknownVal()) const; static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); @@ -48,26 +46,58 @@ class ArrayBoundCheckerV2 : void checkLocation(SVal l, bool isLoad, const Stmt *S, CheckerContext &C) const; }; +} // anonymous namespace -// FIXME: Eventually replace RegionRawOffset with this class. -class RegionRawOffsetV2 { -private: - const SubRegion *baseRegion; - NonLoc byteOffset; +/// For a given Location that can be represented as a symbolic expression +/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block +/// Arr and the distance of Location from the beginning of Arr (expressed in a +/// NonLoc that specifies the number of CharUnits). Returns nullopt when these +/// cannot be determined. +std::optional<std::pair<const SubRegion *, NonLoc>> +computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { + QualType T = SVB.getArrayIndexType(); + auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) { + // We will use this utility to add and multiply values. + return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>(); + }; -public: - RegionRawOffsetV2(const SubRegion *base, NonLoc offset) - : baseRegion(base), byteOffset(offset) { assert(base); } + const auto *Region = dyn_cast_or_null<SubRegion>(Location.getAsRegion()); + std::optional<NonLoc> Offset = std::nullopt; + + while (const auto *ERegion = dyn_cast_or_null<ElementRegion>(Region)) { + const auto Index = ERegion->getIndex().getAs<NonLoc>(); + if (!Index) + return std::nullopt; + + QualType ElemType = ERegion->getElementType(); + // If the element is an incomplete type, go no further. + if (ElemType->isIncompleteType()) + return std::nullopt; + + // Calculate Delta = Index * sizeof(ElemType). + NonLoc Size = SVB.makeArrayIndex( + SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); + auto Delta = Calc(BO_Mul, *Index, Size); + if (!Delta) + return std::nullopt; + + // Perform Offset += Delta, handling the initial nullopt as 0. + if (!Offset) { + Offset = Delta; + } else { + Offset = Calc(BO_Add, *Offset, *Delta); + if (!Offset) + return std::nullopt; + } - NonLoc getByteOffset() const { return byteOffset; } - const SubRegion *getRegion() const { return baseRegion; } + // Continute the offset calculations with the SuperRegion. + Region = ERegion->getSuperRegion()->getAs<SubRegion>(); + } - static std::optional<RegionRawOffsetV2> - computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location); + if (Region && Offset) + return std::make_pair(Region, *Offset); - void dump() const; - void dumpToStream(raw_ostream &os) const; -}; + return std::nullopt; } // TODO: once the constraint manager is smart enough to handle non simplified @@ -86,8 +116,8 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, APSIntType(extent.getValue()).convert(SIE->getRHS()); switch (SIE->getOpcode()) { case BO_Mul: - // The constant should never be 0 here, since it the result of scaling - // based on the size of a type which is never 0. + // The constant should never be 0 here, becasue multiplication by zero + // is simplified by the engine. if ((extent.getValue() % constant) != 0) return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); else @@ -163,16 +193,15 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, return; ProgramStateRef state = checkerContext.getState(); - SValBuilder &svalBuilder = checkerContext.getSValBuilder(); - const std::optional<RegionRawOffsetV2> &RawOffset = - RegionRawOffsetV2::computeOffset(state, svalBuilder, location); + + const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset = + computeOffset(state, svalBuilder, location); if (!RawOffset) return; - NonLoc ByteOffset = RawOffset->getByteOffset(); - const SubRegion *Reg = RawOffset->getRegion(); + auto [Reg, ByteOffset] = *RawOffset; // CHECK LOWER BOUND const MemSpaceRegion *Space = Reg->getMemorySpace(); @@ -207,12 +236,13 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, if (state_exceedsUpperBound) { if (!state_withinUpperBound) { // We know that the index definitely exceeds the upper bound. - reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds); return; } if (isTainted(state, ByteOffset)) { // Both cases are possible, but the index is tainted, so report. - reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset); + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint, + ByteOffset); return; } } @@ -224,58 +254,40 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, checkerContext.addTransition(state); } -void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext, - ProgramStateRef errorState, - SVal TaintedSVal) const { - ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); - if (!errorNode) - return; - - if (!TaintBT) - TaintBT.reset( - new BugType(this, "Out-of-bound access", categories::TaintedData)); - - SmallString<256> buf; - llvm::raw_svector_ostream os(buf); - os << "Out of bound memory access (index is tainted)"; - auto BR = - std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), errorNode); - - // Track back the propagation of taintedness. - for (SymbolRef Sym : getTaintedSymbols(errorState, TaintedSVal)) { - BR->markInteresting(Sym); - } - - checkerContext.emitReport(std::move(BR)); -} - -void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext, - ProgramStateRef errorState, - OOB_Kind kind) const { +void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, + ProgramStateRef ErrorState, OOB_Kind Kind, + SVal TaintedSVal) const { - ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); - if (!errorNode) + ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState); + if (!ErrorNode) return; - if (!BT) - BT.reset(new BugType(this, "Out-of-bound access")); + // FIXME: These diagnostics are preliminary, and they'll be replaced soon by + // a follow-up commit. - // FIXME: This diagnostics are preliminary. We should get far better - // diagnostics for explaining buffer overruns. + SmallString<128> Buf; + llvm::raw_svector_ostream Out(Buf); + Out << "Out of bound memory access "; - SmallString<256> buf; - llvm::raw_svector_ostream os(buf); - os << "Out of bound memory access "; - switch (kind) { + switch (Kind) { case OOB_Precedes: - os << "(accessed memory precedes memory block)"; + Out << "(accessed memory precedes memory block)"; + break; + case OOB_Exceeds: + Out << "(access exceeds upper limit of memory block)"; break; - case OOB_Excedes: - os << "(access exceeds upper limit of memory block)"; + case OOB_Taint: + Out << "(index is tainted)"; break; } - auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode); - checkerContext.emitReport(std::move(BR)); + auto BR = std::make_unique<PathSensitiveBugReport>( + Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode); + // Track back the propagation of taintedness, or do nothing if TaintedSVal is + // the default UnknownVal(). + for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) { + BR->markInteresting(Sym); + } + C.emitReport(std::move(BR)); } bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { @@ -297,67 +309,6 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { (MacroName == "isupper") || (MacroName == "isxdigit")); } -#ifndef NDEBUG -LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const { - dumpToStream(llvm::errs()); -} - -void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const { - os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}'; -} -#endif - -/// For a given Location that can be represented as a symbolic expression -/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block -/// Arr and the distance of Location from the beginning of Arr (expressed in a -/// NonLoc that specifies the number of CharUnits). Returns nullopt when these -/// cannot be determined. -std::optional<RegionRawOffsetV2> -RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB, - SVal Location) { - QualType T = SVB.getArrayIndexType(); - auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) { - // We will use this utility to add and multiply values. - return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>(); - }; - - const MemRegion *Region = Location.getAsRegion(); - NonLoc Offset = SVB.makeZeroArrayIndex(); - - while (Region) { - if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) { - if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) { - QualType ElemType = ERegion->getElementType(); - // If the element is an incomplete type, go no further. - if (ElemType->isIncompleteType()) - return std::nullopt; - - // Perform Offset += Index * sizeof(ElemType); then continue the offset - // calculations with SuperRegion: - NonLoc Size = SVB.makeArrayIndex( - SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); - if (auto Delta = Calc(BO_Mul, *Index, Size)) { - if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) { - Offset = *NewOffset; - Region = ERegion->getSuperRegion(); - continue; - } - } - } - } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) { - // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising - // to see a MemSpaceRegion at this point. - // FIXME: We may return with {<Region>, 0} even if we didn't handle any - // ElementRegion layers. I think that this behavior was introduced - // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so - // it may be useful to review it in the future. - return RegionRawOffsetV2(SRegion, Offset); - } - return std::nullopt; - } - return std::nullopt; -} - void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { mgr.registerChecker<ArrayBoundCheckerV2>(); } >From 59bccd98022f61e6a656dca6ac3fc5622f994226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Mon, 16 Oct 2023 12:28:40 +0200 Subject: [PATCH 2/4] Clarify computeOffset() following the suggestions in the review --- .../Checkers/ArrayBoundCheckerV2.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index c2ffcdf5e306d1f..0b53aa9b2051cdd 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -56,12 +56,14 @@ class ArrayBoundCheckerV2 : std::optional<std::pair<const SubRegion *, NonLoc>> computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { QualType T = SVB.getArrayIndexType(); - auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) { + auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) { // We will use this utility to add and multiply values. return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>(); }; const auto *Region = dyn_cast_or_null<SubRegion>(Location.getAsRegion()); + // This is initialized to nullopt instead of 0 becasue we want to discard the + // situations when there are no ElementRegion layers. std::optional<NonLoc> Offset = std::nullopt; while (const auto *ERegion = dyn_cast_or_null<ElementRegion>(Region)) { @@ -70,22 +72,24 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { return std::nullopt; QualType ElemType = ERegion->getElementType(); - // If the element is an incomplete type, go no further. + + // Paranoia: getTypeSizeInChars() doesn't handle incomplete types. if (ElemType->isIncompleteType()) return std::nullopt; // Calculate Delta = Index * sizeof(ElemType). NonLoc Size = SVB.makeArrayIndex( SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); - auto Delta = Calc(BO_Mul, *Index, Size); + auto Delta = EvalBinOp(BO_Mul, *Index, Size); if (!Delta) return std::nullopt; - // Perform Offset += Delta, handling the initial nullopt as 0. if (!Offset) { + // Store Delta as the first non-nullopt Offset value. Offset = Delta; } else { - Offset = Calc(BO_Add, *Offset, *Delta); + // Update the previous offset with Offset += Delta. + Offset = EvalBinOp(BO_Add, *Offset, *Delta); if (!Offset) return std::nullopt; } >From fd1015d9e71381332bb169f05ef8a8f98f03cd22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Mon, 16 Oct 2023 13:00:02 +0200 Subject: [PATCH 3/4] Satisfy git-clang-format --- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 0b53aa9b2051cdd..c7c41e03ea8dfcf 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -56,9 +56,9 @@ class ArrayBoundCheckerV2 : std::optional<std::pair<const SubRegion *, NonLoc>> computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { QualType T = SVB.getArrayIndexType(); - auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) { + auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) { // We will use this utility to add and multiply values. - return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>(); + return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>(); }; const auto *Region = dyn_cast_or_null<SubRegion>(Location.getAsRegion()); >From 48e03c3addc2e8573ebfc43b516522c010420e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Fri, 20 Oct 2023 14:46:43 +0200 Subject: [PATCH 4/4] Clarify the logic of ComputeOffset() --- .../Checkers/ArrayBoundCheckerV2.cpp | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index c7c41e03ea8dfcf..532aa67011194e1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -61,19 +61,22 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>(); }; - const auto *Region = dyn_cast_or_null<SubRegion>(Location.getAsRegion()); - // This is initialized to nullopt instead of 0 becasue we want to discard the - // situations when there are no ElementRegion layers. - std::optional<NonLoc> Offset = std::nullopt; + const SubRegion *OwnerRegion = nullptr; + std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex(); - while (const auto *ERegion = dyn_cast_or_null<ElementRegion>(Region)) { - const auto Index = ERegion->getIndex().getAs<NonLoc>(); + const ElementRegion *CurRegion = + dyn_cast_or_null<ElementRegion>(Location.getAsRegion()); + + while (CurRegion) { + const auto Index = CurRegion->getIndex().getAs<NonLoc>(); if (!Index) return std::nullopt; - QualType ElemType = ERegion->getElementType(); + QualType ElemType = CurRegion->getElementType(); - // Paranoia: getTypeSizeInChars() doesn't handle incomplete types. + // FIXME: The following early return was presumably added to safeguard the + // getTypeSizeInChars() call (which doesn't accept an incomplete type), but + // it seems that `ElemType` cannot be incomplete at this point. if (ElemType->isIncompleteType()) return std::nullopt; @@ -84,22 +87,19 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { if (!Delta) return std::nullopt; - if (!Offset) { - // Store Delta as the first non-nullopt Offset value. - Offset = Delta; - } else { - // Update the previous offset with Offset += Delta. - Offset = EvalBinOp(BO_Add, *Offset, *Delta); - if (!Offset) - return std::nullopt; - } + // Perform Offset += Delta. + Offset = EvalBinOp(BO_Add, *Offset, *Delta); + if (!Offset) + return std::nullopt; - // Continute the offset calculations with the SuperRegion. - Region = ERegion->getSuperRegion()->getAs<SubRegion>(); + OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>(); + // When this is just another ElementRegion layer, we need to continue the + // offset calculations: + CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion); } - if (Region && Offset) - return std::make_pair(Region, *Offset); + if (OwnerRegion) + return std::make_pair(OwnerRegion, *Offset); return std::nullopt; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits