This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG8086323a91b5: [-Wunsafe-buffer-usage] NFC: Implement 
fix-strategies and variable-use-claiming. (authored by Artem Dergachev 
<adergac...@apple.com>).
Herald added a project: clang.

Changed prior to commit:
  https://reviews.llvm.org/D138253?vs=480397&id=483705#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D138253/new/

https://reviews.llvm.org/D138253

Files:
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Analysis/UnsafeBufferUsage.cpp
  clang/lib/Sema/AnalysisBasedWarnings.cpp

Index: clang/lib/Sema/AnalysisBasedWarnings.cpp
===================================================================
--- clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2150,9 +2150,18 @@
   UnsafeBufferUsageReporter(Sema &S) : S(S) {}
 
   void handleUnsafeOperation(const Stmt *Operation) override {
-    S.Diag(Operation->getBeginLoc(), diag::warn_unsafe_buffer_usage)
+    S.Diag(Operation->getBeginLoc(), diag::warn_unsafe_buffer_expression)
         << Operation->getSourceRange();
   }
+
+  void handleFixableVariable(const VarDecl *Variable,
+                             FixItList &&Fixes) override {
+    const auto &D =
+        S.Diag(Variable->getBeginLoc(), diag::warn_unsafe_buffer_variable);
+    D << Variable << Variable->getSourceRange();
+    for (const auto &F: Fixes)
+      D << F;
+  }
 };
 
 
@@ -2449,7 +2458,8 @@
         checkThrowInNonThrowingFunc(S, FD, AC);
 
   // Emit unsafe buffer usage warnings and fixits.
-  if (!Diags.isIgnored(diag::warn_unsafe_buffer_usage, D->getBeginLoc())) {
+  if (!Diags.isIgnored(diag::warn_unsafe_buffer_expression, D->getBeginLoc()) ||
+      !Diags.isIgnored(diag::warn_unsafe_buffer_variable, D->getBeginLoc())) {
     UnsafeBufferUsageReporter R(S);
     checkUnsafeBufferUsage(D, R);
   }
Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===================================================================
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -14,6 +14,18 @@
 using namespace clang;
 using namespace ast_matchers;
 
+namespace {
+// Because the analysis revolves around variables and their types, we'll need to
+// track uses of variables (aka DeclRefExprs).
+using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
+
+// Convenience typedef.
+using FixItList = SmallVector<FixItHint, 4>;
+
+// Defined below.
+class Strategy;
+} // namespace
+
 // Because we're dealing with raw pointers, let's define what we mean by that.
 static auto hasPointerType() {
   return anyOf(hasType(pointerType()),
@@ -49,6 +61,18 @@
   virtual bool isSafe() const = 0;
   virtual const Stmt *getBaseStmt() const = 0;
 
+  /// Returns the list of pointer-type variables on which this gadget performs
+  /// its operation. Typically, there's only one variable. This isn't a list
+  /// of all DeclRefExprs in the gadget's AST!
+  virtual DeclUseList getClaimedVarUseSites() const = 0;
+
+  /// Returns a fixit that would fix the current gadget according to
+  /// the current strategy. Returns None if the fix cannot be produced;
+  /// returns an empty list if no fixes are necessary.
+  virtual std::optional<FixItList> getFixits(const Strategy &) const {
+    return std::nullopt;
+  }
+
   virtual ~Gadget() = default;
 
 private:
@@ -103,6 +127,16 @@
   }
 
   const UnaryOperator *getBaseStmt() const override { return Op; }
+
+  DeclUseList getClaimedVarUseSites() const override {
+    SmallVector<const DeclRefExpr *, 2> Uses;
+    if (const auto *DRE =
+            dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
+      Uses.push_back(DRE);
+    }
+
+    return std::move(Uses);
+  }
 };
 
 /// A decrement of a pointer-type value is unsafe as it may run the pointer
@@ -128,6 +162,15 @@
   }
 
   const UnaryOperator *getBaseStmt() const override { return Op; }
+
+  DeclUseList getClaimedVarUseSites() const override {
+    if (const auto *DRE =
+            dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) {
+      return {DRE};
+    }
+
+    return {};
+  }
 };
 
 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
@@ -154,17 +197,115 @@
   }
 
   const ArraySubscriptExpr *getBaseStmt() const override { return ASE; }
+
+  DeclUseList getClaimedVarUseSites() const override {
+    if (const auto *DRE =
+            dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) {
+      return {DRE};
+    }
+
+    return {};
+  }
 };
 } // namespace
 
-// Scan the function and return a list of gadgets found with provided kits.
-static GadgetList findGadgets(const Decl *D) {
+namespace {
+// An auxiliary tracking facility for the fixit analysis. It helps connect
+// declarations to its and make sure we've covered all uses with our analysis
+// before we try to fix the declaration.
+class DeclUseTracker {
+  using UseSetTy = SmallSet<const DeclRefExpr *, 16>;
+  using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>;
+
+  // Allocate on the heap for easier move.
+  std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
+  DefMapTy Defs{};
 
-  class GadgetFinderCallback : public MatchFinder::MatchCallback {
-    GadgetList &Output;
+public:
+  DeclUseTracker() = default;
+  DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
+  DeclUseTracker(DeclUseTracker &&) = default;
+
+  // Start tracking a freshly discovered DRE.
+  void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); }
+
+  // Stop tracking the DRE as it's been fully figured out.
+  void claimUse(const DeclRefExpr *DRE) {
+    assert(Uses->count(DRE) &&
+           "DRE not found or claimed by multiple matchers!");
+    Uses->erase(DRE);
+  }
+
+  // A variable is unclaimed if at least one use is unclaimed.
+  bool hasUnclaimedUses(const VarDecl *VD) const {
+    // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
+    return any_of(*Uses, [VD](const DeclRefExpr *DRE) {
+      return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
+    });
+  }
+
+  void discoverDecl(const DeclStmt *DS) {
+    for (const Decl *D : DS->decls()) {
+      if (const auto *VD = dyn_cast<VarDecl>(D)) {
+        assert(Defs.count(VD) == 0 && "Definition already discovered!");
+        Defs[VD] = DS;
+      }
+    }
+  }
 
-  public:
-    GadgetFinderCallback(GadgetList &Output) : Output(Output) {}
+  const DeclStmt *lookupDecl(const VarDecl *VD) const {
+    auto It = Defs.find(VD);
+    assert(It != Defs.end() && "Definition never discovered!");
+    return It->second;
+  }
+};
+} // namespace
+
+namespace {
+// Strategy is a map from variables to the way we plan to emit fixes for
+// these variables. It is figured out gradually by trying different fixes
+// for different variables depending on gadgets in which these variables
+// participate.
+class Strategy {
+public:
+  enum class Kind {
+    Wontfix,    // We don't plan to emit a fixit for this variable.
+    Span,       // We recommend replacing the variable with std::span.
+    Iterator,   // We recommend replacing the variable with std::span::iterator.
+    Array,      // We recommend replacing the variable with std::array.
+    Vector      // We recommend replacing the variable with std::vector.
+  };
+
+private:
+  using MapTy = llvm::DenseMap<const VarDecl *, Kind>;
+
+  MapTy Map;
+
+public:
+  Strategy() = default;
+  Strategy(const Strategy &) = delete; // Let's avoid copies.
+  Strategy(Strategy &&) = default;
+
+  void set(const VarDecl *VD, Kind K) {
+    Map[VD] = K;
+  }
+
+  Kind lookup(const VarDecl *VD) const {
+    auto I = Map.find(VD);
+    if (I == Map.end())
+      return Kind::Wontfix;
+
+    return I->second;
+  }
+};
+} // namespace
+
+/// Scan the function and return a list of gadgets found with provided kits.
+static std::pair<GadgetList, DeclUseTracker> findGadgets(const Decl *D) {
+
+  struct GadgetFinderCallback : MatchFinder::MatchCallback {
+    GadgetList Gadgets;
+    DeclUseTracker Tracker;
 
     void run(const MatchFinder::MatchResult &Result) override {
       // In debug mode, assert that we've found exactly one gadget.
@@ -176,12 +317,22 @@
 #define NEXT ++numFound
 #endif
 
+      if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) {
+        Tracker.discoverUse(DRE);
+        NEXT;
+      }
+
+      if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) {
+        Tracker.discoverDecl(DS);
+        NEXT;
+      }
+
       // Figure out which matcher we've found, and call the appropriate
       // subclass constructor.
       // FIXME: Can we do this more logarithmically?
 #define GADGET(name)                                                           \
       if (Result.Nodes.getNodeAs<Stmt>(#name)) {                               \
-        Output.push_back(std::make_unique<name ## Gadget>(Result));            \
+        Gadgets.push_back(std::make_unique<name ## Gadget>(Result));           \
         NEXT;                                                                  \
       }
 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
@@ -191,9 +342,8 @@
     }
   };
 
-  GadgetList G;
   MatchFinder M;
-  GadgetFinderCallback CB(G);
+  GadgetFinderCallback CB;
 
   // clang-format off
   M.addMatcher(
@@ -203,8 +353,13 @@
 #define GADGET(x)                                                              \
         x ## Gadget::matcher().bind(#x),
 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
-        // FIXME: Is there a better way to avoid hanging comma?
-        unless(stmt())
+        // In parallel, match all DeclRefExprs so that to find out
+        // whether there are any uncovered by gadgets.
+        declRefExpr(hasPointerType(), to(varDecl())).bind("any_dre"),
+        // Also match DeclStmts because we'll need them when fixing
+        // their underlying VarDecls that otherwise don't have
+        // any backreferences to DeclStmts.
+        declStmt().bind("any_ds")
       ))
       // FIXME: Idiomatically there should be a forCallable(equalsNode(D))
       // here, to make sure that the statement actually belongs to the
@@ -219,15 +374,97 @@
 
   M.match(*D->getBody(), D->getASTContext());
 
-  return G; // NRVO!
+  // Gadgets "claim" variables they're responsible for. Once this loop finishes,
+  // the tracker will only track DREs that weren't claimed by any gadgets,
+  // i.e. not understood by the analysis.
+  for (const auto &G : CB.Gadgets) {
+    for (const auto *DRE : G->getClaimedVarUseSites()) {
+      CB.Tracker.claimUse(DRE);
+    }
+  }
+
+  return {std::move(CB.Gadgets), std::move(CB.Tracker)};
 }
 
 void clang::checkUnsafeBufferUsage(const Decl *D,
                                    UnsafeBufferUsageHandler &Handler) {
   assert(D && D->getBody());
 
-  GadgetList Gadgets = findGadgets(D);
+  SmallSet<const VarDecl *, 8> WarnedDecls;
+
+  auto [Gadgets, Tracker] = findGadgets(D);
+
+  DenseMap<const VarDecl *, std::vector<const Gadget *>> Map;
+
+  // First, let's sort gadgets by variables. If some gadgets cover more than one
+  // variable, they'll appear more than once in the map.
   for (const auto &G : Gadgets) {
-    Handler.handleUnsafeOperation(G->getBaseStmt());
+    DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
+
+    // Populate the map.
+    bool Pushed = false;
+    for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
+      if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+        Map[VD].push_back(G.get());
+        Pushed = true;
+      }
+    }
+
+    if (!Pushed && !G->isSafe()) {
+      // We won't return to this gadget later. Emit the warning right away.
+      Handler.handleUnsafeOperation(G->getBaseStmt());
+      continue;
+    }
+  }
+
+  Strategy S;
+
+  for (const auto &[VD, VDGadgets] : Map) {
+
+    // If the variable has no unsafe gadgets, skip it entirely.
+    if (!any_of(VDGadgets, [](const Gadget *G) { return !G->isSafe(); }))
+      continue;
+
+    std::optional<FixItList> Fixes = std::nullopt;
+
+    // Avoid suggesting fixes if not all uses of the variable are identified
+    // as known gadgets.
+    // FIXME: Support parameter variables as well.
+    if (!Tracker.hasUnclaimedUses(VD) && VD->isLocalVarDecl()) {
+      // Choose the appropriate strategy. FIXME: We should try different
+      // strategies.
+      S.set(VD, Strategy::Kind::Span);
+
+      // Check if it works.
+      // FIXME: This isn't sufficient (or even correct) when a gadget has
+      // already produced a fixit for a different variable i.e. it was mentioned
+      // in the map twice (or more). In such case the correct thing to do is
+      // to undo the previous fix first, and then if we can't produce the new
+      // fix for both variables, revert to the old one.
+      Fixes = FixItList{};
+      for (const Gadget *G : VDGadgets) {
+        std::optional<FixItList> F = G->getFixits(S);
+        if (!F) {
+          Fixes = std::nullopt;
+          break;
+        }
+
+        for (auto &&Fixit: *F)
+          Fixes->push_back(std::move(Fixit));
+      }
+    }
+
+    if (Fixes) {
+      // If we reach this point, the strategy is applicable.
+      Handler.handleFixableVariable(VD, std::move(*Fixes));
+    } else {
+      // The strategy has failed. Emit the warning without the fixit.
+      S.set(VD, Strategy::Kind::Wontfix);
+      for (const Gadget *G : VDGadgets) {
+        if (!G->isSafe()) {
+          Handler.handleUnsafeOperation(G->getBaseStmt());
+        }
+      }
+    }
   }
 }
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11756,7 +11756,10 @@
   "this builtin requires target: loongarch64">;
 
 // Unsafe buffer usage diagnostics.
-def warn_unsafe_buffer_usage : Warning<
+def warn_unsafe_buffer_expression : Warning<
   "unchecked operation on raw buffer in expression">,
-  InGroup<DiagGroup<"unsafe-buffer-usage">>, DefaultIgnore;
+  InGroup<UnsafeBufferUsage>, DefaultIgnore;
+def warn_unsafe_buffer_variable : Warning<
+  "variable %0 participates in unchecked buffer operations">,
+  InGroup<UnsafeBufferUsage>, DefaultIgnore;
 } // end of sema component.
Index: clang/include/clang/Basic/DiagnosticGroups.td
===================================================================
--- clang/include/clang/Basic/DiagnosticGroups.td
+++ clang/include/clang/Basic/DiagnosticGroups.td
@@ -1393,3 +1393,6 @@
 
 // Warnings and notes related to const_var_decl_type attribute checks
 def ReadOnlyPlacementChecks : DiagGroup<"read-only-types">;
+
+// Warnings and fixes to support the "safe buffers" programming model.
+def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage">;
Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
===================================================================
--- clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -26,8 +26,16 @@
   UnsafeBufferUsageHandler() = default;
   virtual ~UnsafeBufferUsageHandler() = default;
 
+  /// This analyses produces large fixits that are organized into lists
+  /// of primitive fixits (individual insertions/removals/replacements).
+  using FixItList = llvm::SmallVectorImpl<FixItHint>;
+
   /// Invoked when an unsafe operation over raw pointers is found.
   virtual void handleUnsafeOperation(const Stmt *Operation) = 0;
+
+  /// Invoked when a fix is suggested against a variable.
+  virtual void handleFixableVariable(const VarDecl *Variable,
+                                     FixItList &&List) = 0;
 };
 
 // This function invokes the analysis and allows the caller to react to it
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to