donat.nagy updated this revision to Diff 517831.
donat.nagy edited the summary of this revision.
donat.nagy added a comment.

As I was finalizing this commit, I noticed that the logic of 
`RegionRawOffsetV2::computeOffset()` is presented in a ~~crazy~~ unconventional 
way; so I rewrote it (while preserving the original behavior).

The most convoluted part was that the old code initialized the `offset` 
variable to `UnknownVal()`, then compensated for this choice by introducing a 
function `getValue` that always substituted zero when it encountered //this// 
`UnknownVal()` (no other effect placed `UnknownVal()` into that variable 
without immediately returning afterwards). I suspect that this initial value 
was introduced to avoid the analysis of load expressions that do not involve 
any array indexing (i.e. don't have `ElementRegion` layers), but that logic was 
broken by a commit in 2011.

@steakhal Can you provide another review for this extended version of the patch?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149259/new/

https://reviews.llvm.org/D149259

Files:
  clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp

Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -51,21 +51,17 @@
 class RegionRawOffsetV2 {
 private:
   const SubRegion *baseRegion;
-  SVal byteOffset;
-
-  RegionRawOffsetV2()
-    : baseRegion(nullptr), byteOffset(UnknownVal()) {}
+  NonLoc byteOffset;
 
 public:
   RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
       : baseRegion(base), byteOffset(offset) { assert(base); }
 
-  NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); }
+  NonLoc getByteOffset() const { return byteOffset; }
   const SubRegion *getRegion() const { return baseRegion; }
 
-  static RegionRawOffsetV2 computeOffset(ProgramStateRef state,
-                                         SValBuilder &svalBuilder,
-                                         SVal location);
+  static std::optional<RegionRawOffsetV2>
+  computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location);
 
   void dump() const;
   void dumpToStream(raw_ostream &os) const;
@@ -158,16 +154,16 @@
   ProgramStateRef state = checkerContext.getState();
 
   SValBuilder &svalBuilder = checkerContext.getSValBuilder();
-  const RegionRawOffsetV2 &rawOffset =
-    RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
+  const std::optional<RegionRawOffsetV2> &RawOffset =
+      RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
 
-  if (!rawOffset.getRegion())
+  if (!RawOffset)
     return;
 
-  NonLoc ByteOffset = rawOffset.getByteOffset();
+  NonLoc ByteOffset = RawOffset->getByteOffset();
 
   // CHECK LOWER BOUND
-  const MemSpaceRegion *SR = rawOffset.getRegion()->getMemorySpace();
+  const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace();
   if (!llvm::isa<UnknownSpaceRegion>(SR)) {
     // A pointer to UnknownSpaceRegion may point to the middle of
     // an allocated region.
@@ -188,7 +184,7 @@
 
   // CHECK UPPER BOUND
   DefinedOrUnknownSVal Size =
-      getDynamicExtent(state, rawOffset.getRegion(), svalBuilder);
+      getDynamicExtent(state, RawOffset->getRegion(), svalBuilder);
   if (auto KnownSize = Size.getAs<NonLoc>()) {
     auto [state_withinUpperBound, state_exceedsUpperBound] =
         compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
@@ -277,84 +273,55 @@
 }
 #endif
 
-// Lazily computes a value to be used by 'computeOffset'.  If 'val'
-// is unknown or undefined, we lazily substitute '0'.  Otherwise,
-// return 'val'.
-static inline SVal getValue(SVal val, SValBuilder &svalBuilder) {
-  return val.isUndef() ? svalBuilder.makeZeroArrayIndex() : val;
-}
-
-// Scale a base value by a scaling factor, and return the scaled
-// value as an SVal.  Used by 'computeOffset'.
-static inline SVal scaleValue(ProgramStateRef state,
-                              NonLoc baseVal, CharUnits scaling,
-                              SValBuilder &sb) {
-  return sb.evalBinOpNN(state, BO_Mul, baseVal,
-                        sb.makeArrayIndex(scaling.getQuantity()),
-                        sb.getArrayIndexType());
-}
-
-// Add an SVal to another, treating unknown and undefined values as
-// summing to UnknownVal.  Used by 'computeOffset'.
-static SVal addValue(ProgramStateRef state, SVal x, SVal y,
-                     SValBuilder &svalBuilder) {
-  // We treat UnknownVals and UndefinedVals the same here because we
-  // only care about computing offsets.
-  if (x.isUnknownOrUndef() || y.isUnknownOrUndef())
-    return UnknownVal();
-
-  return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs<NonLoc>(),
-                                 y.castAs<NonLoc>(),
-                                 svalBuilder.getArrayIndexType());
-}
-
-/// Compute a raw byte offset from a base region.  Used for array bounds
-/// checking.
-RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
-                                                   SValBuilder &svalBuilder,
-                                                   SVal location)
-{
-  const MemRegion *region = location.getAsRegion();
-  SVal offset = UndefinedVal();
-
-  while (region) {
-    switch (region->getKind()) {
-      default: {
-        if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) {
-          if (auto Offset = getValue(offset, svalBuilder).getAs<NonLoc>())
-            return RegionRawOffsetV2(subReg, *Offset);
-        }
-        return RegionRawOffsetV2();
-      }
-      case MemRegion::ElementRegionKind: {
-        const ElementRegion *elemReg = cast<ElementRegion>(region);
-        SVal index = elemReg->getIndex();
-        if (!isa<NonLoc>(index))
-          return RegionRawOffsetV2();
-        QualType elemType = elemReg->getElementType();
+/// For a given Location that can be represented as a symbolic expression
+/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
+/// Arr and the distance of Location from the beginning of Arr (expressed in a
+/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
+/// cannot be determined.
+std::optional<RegionRawOffsetV2>
+RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB,
+                                 SVal Location) {
+  QualType T = SVB.getArrayIndexType();
+  auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
+    // We will use this utility to add and multiply values.
+    return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
+  };
+
+  const MemRegion *Region = Location.getAsRegion();
+  NonLoc Offset = SVB.makeZeroArrayIndex();
+
+  while (Region) {
+    if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) {
+      if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) {
+        QualType ElemType = ERegion->getElementType();
         // If the element is an incomplete type, go no further.
-        ASTContext &astContext = svalBuilder.getContext();
-        if (elemType->isIncompleteType())
-          return RegionRawOffsetV2();
-
-        // Update the offset.
-        offset = addValue(state,
-                          getValue(offset, svalBuilder),
-                          scaleValue(state,
-                          index.castAs<NonLoc>(),
-                          astContext.getTypeSizeInChars(elemType),
-                          svalBuilder),
-                          svalBuilder);
-
-        if (offset.isUnknownOrUndef())
-          return RegionRawOffsetV2();
-
-        region = elemReg->getSuperRegion();
-        continue;
+        if (ElemType->isIncompleteType())
+          return std::nullopt;
+
+        // Perform Offset += Index * sizeof(ElemType); then continue the offset
+        // calculations with SuperRegion:
+        NonLoc Size = SVB.makeArrayIndex(
+            SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
+        if (auto Delta = Calc(BO_Mul, *Index, Size)) {
+          if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) {
+            Offset = *NewOffset;
+            Region = ERegion->getSuperRegion();
+            continue;
+          }
+        }
       }
+    } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) {
+      // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising
+      // to see a MemSpaceRegion at this point.
+      // FIXME: We may return with {<Region>, 0} even if we didn't handle any
+      // ElementRegion layers. I think that this behavior was introduced
+      // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so
+      // it may be useful to review it in the future.
+      return RegionRawOffsetV2(SRegion, Offset);
     }
+    return std::nullopt;
   }
-  return RegionRawOffsetV2();
+  return std::nullopt;
 }
 
 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to