ziqingluo-90 created this revision.
ziqingluo-90 added reviewers: NoQ, jkorous, t-rasmud, malavikasamak.
Herald added a project: All.
ziqingluo-90 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

The safe-buffer analysis analyzes `TypeLoc`s of types of variable declarations 
in order to get source locations of them.

However, in some cases, the source locations of a `TypeLoc` are not valid.  
Using invalid source locations results in assertion violation or incorrect 
analysis or fix-its.

It is still not clear to me in what circumstances a `TypeLoc`  does not have 
valid source locations (it looks like a bug in Clang to me, but it is not our 
responsibility to fix it).  So we will conservatively give up the analysis when 
required source locations are not valid.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D155667

Files:
  clang/lib/Analysis/UnsafeBufferUsage.cpp
  clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-parm-unsupported.cpp


Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-parm-unsupported.cpp
===================================================================
--- clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-parm-unsupported.cpp
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-parm-unsupported.cpp
@@ -33,6 +33,26 @@
   }
 }
 
+// The analysis requires accurate source location informations from
+// `TypeLoc`s of types of variable (parameter) declarations in order
+// to generate fix-its for them. But those information is not always
+// available (probably due to some bugs in clang but it is irrelevant
+// to the safe-buffer project).  The following is an example.  When
+// `_Atomic` is used, we cannot get valid source locations of the
+// pointee type of `unsigned *`.  The analysis gives up in such a
+// case.
+// CHECK-NOT: fix-it:
+void typeLocSourceLocationInvalid(_Atomic unsigned *map) { // 
expected-warning{{'map' is an unsafe pointer used for buffer access}}
+  map[5] = 5; // expected-note{{used in buffer access here}}
+}
+
+// CHECK: fix-it:"{{.*}}":{[[@LINE+1]]:33-[[@LINE+1]]:46}:"std::span<unsigned> 
map"
+void typeLocSourceLocationValid(unsigned *map) { // expected-warning{{'map' is 
an unsafe pointer used for buffer access}} \
+                                                   expected-note{{change type 
of 'map' to 'std::span' to preserve bounds information}}
+  map[5] = 5; // expected-note{{used in buffer access here}}
+}
+// CHECK: 
fix-it:"{{.*}}":{[[@LINE-1]]:2-[[@LINE-1]]:2}:"\n{{\[}}{{\[}}clang::unsafe_buffer_usage{{\]}}{{\]}}
 void typeLocSourceLocationValid(unsigned *map) {return 
typeLocSourceLocationValid(std::span<unsigned>(map, <# size #>));}\n"
+
 // We do not fix parameters participating unsafe operations for the
 // following functions/methods or function-like expressions:
 
@@ -128,4 +148,3 @@
   int tmp;
   tmp = x[5]; // expected-note{{used in buffer access here}}
 }
-
Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===================================================================
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -1385,8 +1385,18 @@
   TypeLoc PteTyLoc = TyLoc.getUnqualifiedLoc().getNextTypeLoc();
   SourceLocation VDNameStartLoc = VD->getLocation();
 
-  if (!SM.isBeforeInTranslationUnit(PteTyLoc.getSourceRange().getEnd(),
-                                    VDNameStartLoc)) {
+  if (!(VDNameStartLoc.isValid() && PteTyLoc.getSourceRange().isValid())) {
+    // We are expecting these locations to be valid. But in some cases, they 
are
+    // not all valid. It is a Clang bug to me and we are not responsible for
+    // fixing it.  So we will just give up for now when it happens.
+    return std::nullopt;
+  }
+
+  // Note that TypeLoc.getEndLoc() returns the begin location of the last 
token:
+  SourceLocation PteEndOfTokenLoc =
+      Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts);
+
+  if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, VDNameStartLoc)) {
     // We only deal with the cases where the source text of the pointee type
     // appears on the left-hand side of the variable identifier completely,
     // including the following forms:
@@ -1401,13 +1411,8 @@
     // `PteTy` via source ranges.
     *QualifiersToAppend = PteTy.getQualifiers();
   }
-
-  // Note that TypeLoc.getEndLoc() returns the begin location of the last 
token:
-  SourceRange CSR{
-      PteTyLoc.getBeginLoc(),
-      Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts)};
-
-  return getRangeText(CSR, SM, LangOpts)->str();
+  return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts)
+      ->str();
 }
 
 // Returns the text of the name (with qualifiers) of a `FunctionDecl`.


Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-parm-unsupported.cpp
===================================================================
--- clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-parm-unsupported.cpp
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-parm-unsupported.cpp
@@ -33,6 +33,26 @@
   }
 }
 
+// The analysis requires accurate source location informations from
+// `TypeLoc`s of types of variable (parameter) declarations in order
+// to generate fix-its for them. But those information is not always
+// available (probably due to some bugs in clang but it is irrelevant
+// to the safe-buffer project).  The following is an example.  When
+// `_Atomic` is used, we cannot get valid source locations of the
+// pointee type of `unsigned *`.  The analysis gives up in such a
+// case.
+// CHECK-NOT: fix-it:
+void typeLocSourceLocationInvalid(_Atomic unsigned *map) { // expected-warning{{'map' is an unsafe pointer used for buffer access}}
+  map[5] = 5; // expected-note{{used in buffer access here}}
+}
+
+// CHECK: fix-it:"{{.*}}":{[[@LINE+1]]:33-[[@LINE+1]]:46}:"std::span<unsigned> map"
+void typeLocSourceLocationValid(unsigned *map) { // expected-warning{{'map' is an unsafe pointer used for buffer access}} \
+						    expected-note{{change type of 'map' to 'std::span' to preserve bounds information}}
+  map[5] = 5; // expected-note{{used in buffer access here}}
+}
+// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:2-[[@LINE-1]]:2}:"\n{{\[}}{{\[}}clang::unsafe_buffer_usage{{\]}}{{\]}} void typeLocSourceLocationValid(unsigned *map) {return typeLocSourceLocationValid(std::span<unsigned>(map, <# size #>));}\n"
+
 // We do not fix parameters participating unsafe operations for the
 // following functions/methods or function-like expressions:
 
@@ -128,4 +148,3 @@
   int tmp;
   tmp = x[5]; // expected-note{{used in buffer access here}}
 }
-
Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===================================================================
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -1385,8 +1385,18 @@
   TypeLoc PteTyLoc = TyLoc.getUnqualifiedLoc().getNextTypeLoc();
   SourceLocation VDNameStartLoc = VD->getLocation();
 
-  if (!SM.isBeforeInTranslationUnit(PteTyLoc.getSourceRange().getEnd(),
-                                    VDNameStartLoc)) {
+  if (!(VDNameStartLoc.isValid() && PteTyLoc.getSourceRange().isValid())) {
+    // We are expecting these locations to be valid. But in some cases, they are
+    // not all valid. It is a Clang bug to me and we are not responsible for
+    // fixing it.  So we will just give up for now when it happens.
+    return std::nullopt;
+  }
+
+  // Note that TypeLoc.getEndLoc() returns the begin location of the last token:
+  SourceLocation PteEndOfTokenLoc =
+      Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts);
+
+  if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, VDNameStartLoc)) {
     // We only deal with the cases where the source text of the pointee type
     // appears on the left-hand side of the variable identifier completely,
     // including the following forms:
@@ -1401,13 +1411,8 @@
     // `PteTy` via source ranges.
     *QualifiersToAppend = PteTy.getQualifiers();
   }
-
-  // Note that TypeLoc.getEndLoc() returns the begin location of the last token:
-  SourceRange CSR{
-      PteTyLoc.getBeginLoc(),
-      Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts)};
-
-  return getRangeText(CSR, SM, LangOpts)->str();
+  return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts)
+      ->str();
 }
 
 // Returns the text of the name (with qualifiers) of a `FunctionDecl`.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to