Author: DonĂ¡t Nagy
Date: 2025-09-17T13:18:13+02:00
New Revision: 5cbaf5552e9cab25dc3eb760ffb5ab7d3dd59249

URL: 
https://github.com/llvm/llvm-project/commit/5cbaf5552e9cab25dc3eb760ffb5ab7d3dd59249
DIFF: 
https://github.com/llvm/llvm-project/commit/5cbaf5552e9cab25dc3eb760ffb5ab7d3dd59249.diff

LOG: [analyzer] Show element count in ArrayBound underflow reports (#158639)

The underflow reports of checker security.ArrayBound previously
displayed the (negative) byte offset of the accessed location; but those
numbers were a bit hard to decipher (especially when the elements were
large structs), so this commit replaces the byte offset with an index
value (if the offset is an integer multiple of the size of the accessed
element).

This change only affects the content of the messages; the checker will
find the same issues before and after this commit.

Added: 
    

Modified: 
    clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
    clang/test/Analysis/ArrayBound/assumption-reporting.c
    clang/test/Analysis/ArrayBound/verbose-tests.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index d35031b5c22df..6ad5acd4e76f2 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -130,6 +130,20 @@ struct Messages {
   std::string Short, Full;
 };
 
+enum class BadOffsetKind { Negative, Overflowing, Indeterminate };
+
+constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing",
+                                              "a negative or overflowing"};
+static StringRef asAdjective(BadOffsetKind Problem) {
+  return Adjectives[static_cast<int>(Problem)];
+}
+
+constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end 
of",
+                                                "around"};
+static StringRef asPreposition(BadOffsetKind Problem) {
+  return Prepositions[static_cast<int>(Problem)];
+}
+
 // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
 // instead of `PreStmt` because the current implementation passes the whole
 // expression to `CheckerContext::getSVal()` which only works after the
@@ -388,18 +402,6 @@ static std::optional<int64_t> 
getConcreteValue(std::optional<NonLoc> SV) {
   return SV ? getConcreteValue(*SV) : std::nullopt;
 }
 
-static Messages getPrecedesMsgs(const MemSpaceRegion *Space,
-                                const SubRegion *Region, NonLoc Offset) {
-  std::string RegName = getRegionName(Space, Region), OffsetStr = "";
-
-  if (auto ConcreteOffset = getConcreteValue(Offset))
-    OffsetStr = formatv(" {0}", ConcreteOffset);
-
-  return {
-      formatv("Out of bound access to memory preceding {0}", RegName),
-      formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
-}
-
 /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
 /// it can be performed (`Divisor` is nonzero and there is no remainder). The
 /// values `Val1` and `Val2` may be nullopt and in that case the corresponding
@@ -419,10 +421,11 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
   return true;
 }
 
-static Messages getExceedsMsgs(ASTContext &ACtx, const MemSpaceRegion *Space,
-                               const SubRegion *Region, NonLoc Offset,
-                               NonLoc Extent, SVal Location,
-                               bool AlsoMentionUnderflow) {
+static Messages getNonTaintMsgs(const ASTContext &ACtx,
+                                const MemSpaceRegion *Space,
+                                const SubRegion *Region, NonLoc Offset,
+                                std::optional<NonLoc> Extent, SVal Location,
+                                BadOffsetKind Problem) {
   std::string RegName = getRegionName(Space, Region);
   const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
   assert(EReg && "this checker only handles element access");
@@ -439,15 +442,21 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const 
MemSpaceRegion *Space,
   SmallString<256> Buf;
   llvm::raw_svector_ostream Out(Buf);
   Out << "Access of ";
-  if (!ExtentN && !UseByteOffsets)
+  if (OffsetN && !ExtentN && !UseByteOffsets) {
+    // If the offset is reported as an index, then the report must mention the
+    // element type (because it is not always clear from the code). It's more
+    // natural to mention the element type later where the extent is described,
+    // but if the extent is unknown/irrelevant, then the element type can be
+    // inserted into the message at this point.
     Out << "'" << ElemType.getAsString() << "' element in ";
+  }
   Out << RegName << " at ";
-  if (AlsoMentionUnderflow) {
-    Out << "a negative or overflowing " << OffsetOrIndex;
-  } else if (OffsetN) {
+  if (OffsetN) {
+    if (Problem == BadOffsetKind::Negative)
+      Out << "negative ";
     Out << OffsetOrIndex << " " << *OffsetN;
   } else {
-    Out << "an overflowing " << OffsetOrIndex;
+    Out << asAdjective(Problem) << " " << OffsetOrIndex;
   }
   if (ExtentN) {
     Out << ", while it holds only ";
@@ -465,8 +474,7 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const 
MemSpaceRegion *Space,
   }
 
   return {formatv("Out of bound access to memory {0} {1}",
-                  AlsoMentionUnderflow ? "around" : "after the end of",
-                  RegName),
+                  asPreposition(Problem), RegName),
           std::string(Buf)};
 }
 
@@ -635,7 +643,9 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
       } else {
         if (!WithinLowerBound) {
           // ...and it cannot be valid (>= 0), so report an error.
-          Messages Msgs = getPrecedesMsgs(Space, Reg, ByteOffset);
+          Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg,
+                                          ByteOffset, /*Extent=*/std::nullopt,
+                                          Location, BadOffsetKind::Negative);
           reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
           return;
         }
@@ -677,9 +687,12 @@ void ArrayBoundChecker::performCheck(const Expr *E, 
CheckerContext &C) const {
           return;
         }
 
+        BadOffsetKind Problem = AlsoMentionUnderflow
+                                    ? BadOffsetKind::Indeterminate
+                                    : BadOffsetKind::Overflowing;
         Messages Msgs =
-            getExceedsMsgs(C.getASTContext(), Space, Reg, ByteOffset,
-                           *KnownSize, Location, AlsoMentionUnderflow);
+            getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset,
+                            *KnownSize, Location, Problem);
         reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
         return;
       }

diff  --git a/clang/test/Analysis/ArrayBound/assumption-reporting.c 
b/clang/test/Analysis/ArrayBound/assumption-reporting.c
index 535e623baa815..bffd5d9bc35b5 100644
--- a/clang/test/Analysis/ArrayBound/assumption-reporting.c
+++ b/clang/test/Analysis/ArrayBound/assumption-reporting.c
@@ -70,7 +70,7 @@ int assumingUpper(int arg) {
   // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'TenElements'}}
   int b = TenElements[arg - 10];
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
+  // expected-note@-2 {{Access of 'TenElements' at a negative index}}
   return a + b;
 }
 
@@ -99,7 +99,7 @@ int assumingUpperUnsigned(unsigned arg) {
   // expected-note@-1 {{Assuming index is less than 10, the number of 'int' 
elements in 'TenElements'}}
   int b = TenElements[(int)arg - 10];
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
+  // expected-note@-2 {{Access of 'TenElements' at a negative index}}
   return a + b;
 }
 
@@ -111,7 +111,7 @@ int assumingNothing(unsigned arg) {
   int a = TenElements[arg]; // no note here, we already know that 'arg' is in 
bounds
   int b = TenElements[(int)arg - 10];
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
+  // expected-note@-2 {{Access of 'TenElements' at a negative index}}
   return a + b;
 }
 
@@ -145,7 +145,7 @@ int assumingConvertedToIntP(struct foo f, int arg) {
   // expected-note@-1 {{Assuming byte offset is less than 5, the extent of 
'f.b'}}
   int c = TenElements[arg-2];
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset}}
+  // expected-note@-2 {{Access of 'TenElements' at a negative index}}
   return a + b + c;
 }
 

diff  --git a/clang/test/Analysis/ArrayBound/verbose-tests.c 
b/clang/test/Analysis/ArrayBound/verbose-tests.c
index 84d238ed1a2a4..e3416886d13e5 100644
--- a/clang/test/Analysis/ArrayBound/verbose-tests.c
+++ b/clang/test/Analysis/ArrayBound/verbose-tests.c
@@ -11,7 +11,7 @@ int TenElements[10];
 void arrayUnderflow(void) {
   TenElements[-3] = 5;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
+  // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative 
index -3}}
 }
 
 int underflowWithDeref(void) {
@@ -19,9 +19,39 @@ int underflowWithDeref(void) {
   --p;
   return *p;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -4}}
+  // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative 
index -1}}
+}
+
+char underflowReportedAsChar(void) {
+  // Underflow is reported with the type of the accessed element (here 'char'),
+  // not the type that appears in the declaration of the original array (which
+  // would be 'int').
+  return ((char *)TenElements)[-1];
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'char' element in 'TenElements' at negative 
index -1}}
 }
 
+struct TwoInts {
+  int a, b;
+};
+
+struct TwoInts underflowReportedAsStruct(void) {
+  // Another case where the accessed type is used for reporting the offset.
+  return *(struct TwoInts*)(TenElements - 4);
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'struct TwoInts' element in 'TenElements' at 
negative index -2}}
+}
+
+struct TwoInts underflowOnlyByteOffset(void) {
+  // In this case the negative byte offset is not a multiple of the size of the
+  // accessed element, so the part "= -... * sizeof(type)" is omitted at the
+  // end of the message.
+  return *(struct TwoInts*)(TenElements - 3);
+  // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
+  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -12}}
+}
+
+
 int rng(void);
 int getIndex(void) {
   switch (rng()) {
@@ -40,10 +70,10 @@ void gh86959(void) {
   while (rng())
     TenElements[getIndex()] = 10;
   // expected-warning@-1 {{Out of bound access to memory preceding 
'TenElements'}}
-  // expected-note@-2 {{Access of 'TenElements' at negative byte offset -688}}
+  // expected-note@-2 {{Access of 'int' element in 'TenElements' at negative 
index -172}}
 }
 
-int scanf(const char *restrict fmt, ...);
+int scanf(const char *fmt, ...);
 
 void taintedIndex(void) {
   int index;


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to