[Lldb-commits] [PATCH] D74759: Treat RangeDataVector as an augmented BST

Unnar Freyr Erlendsson via Phabricator via lldb-commits Tue, 18 Feb 2020 03:54:13 -0800

unnar created this revision.
unnar added reviewers: labath, teemperor.
Herald added subscribers: lldb-commits, JDevlieghere.
Herald added a project: LLDB.


Since RangeDataVector is assumed to always be sorted we can treat it as an 
flattened BST and augment it with additional information about the ranges 
belonging to each "subtree". By storing the maximum endpoint in every subtree 
we can query for intervals in O(log n) time.

Note: this is not a complete patch, I just wanted to put it out there and see 
how you feel about this. Also what kind of testing could and should be done for 
this.


Repository:
  rLLDB LLDB

https://reviews.llvm.org/D74759

Files:
  lldb/include/lldb/Utility/RangeMap.h

Index: lldb/include/lldb/Utility/RangeMap.h
===================================================================
--- lldb/include/lldb/Utility/RangeMap.h
+++ lldb/include/lldb/Utility/RangeMap.h
@@ -592,13 +592,17 @@
 struct RangeData : public Range<B, S> {
   typedef T DataType;
 
+  B upper_bound;
   DataType data;
 
-  RangeData() : Range<B, S>(), data() {}
+  RangeData() : Range<B, S>(), upper_bound(), data() {}
 
-  RangeData(B base, S size) : Range<B, S>(base, size), data() {}
+  RangeData(B base, S size) : Range<B, S>(base, size), upper_bound(), data() {}
 
-  RangeData(B base, S size, DataType d) : Range<B, S>(base, size), data(d) {}
+  RangeData(B base, S size, DataType d) : Range<B, S>(base, size), upper_bound(), data(d) {}
+
+  RangeData(B base, S size, B ub, DataType d) : Range<B, S>(base, size), upper_bound(ub),
+      data(d) {}
 };
 
 template <typename B, typename S, typename T, unsigned N = 0,
@@ -609,11 +613,11 @@
   typedef RangeData<B, S, T> Entry;
   typedef llvm::SmallVector<Entry, N> Collection;
 
-  RangeDataVector(Compare compare = Compare()) : m_compare(compare) {}
+  RangeDataVector(Compare compare = Compare()) : m_compare(compare), upper_bound_computed(false) {}
 
   ~RangeDataVector() = default;
 
-  void Append(const Entry &entry) { m_entries.push_back(entry); }
+  void Append(const Entry &entry) { m_entries.push_back(entry); upper_bound_computed = false; }
 
   void Sort() {
     if (m_entries.size() > 1)
@@ -627,11 +631,28 @@
                        });
   }
 
+  // We can treat the vector as a flattened BST, augmenting it with upper bounds (max of
+  // range endpoints) for every index allows us to query for intersections in O(log n) time.
+  void ComputeUpperBounds() {
+    ComputeUpperBounds(0, m_entries.size() - 1);
+    upper_bound_computed = true;
+  }
+
+  B ComputeUpperBounds(int lo, int hi) {
+    if (lo > hi) return B();
+
+    int mid = (lo + hi) / 2;
+    auto entry = m_entries[mid];
+
+    entry.upper_bound = std::max(entry.base + entry.size, std::max(ComputeUpperBounds(lo, mid - 1),
+      ComputeUpperBounds(mid + 1, hi)));
+
+    return entry.upper_bound;
+  }
+
 #ifdef ASSERT_RANGEMAP_ARE_SORTED
   bool IsSorted() const {
     typename Collection::const_iterator pos, end, prev;
-    // First we determine if we can combine any of the Entry objects so we
-    // don't end up allocating and making a new collection for no reason
     for (pos = m_entries.begin(), end = m_entries.end(), prev = end; pos != end;
          prev = pos++) {
       if (prev != end && *pos < *prev)
@@ -677,7 +698,7 @@
     }
   }
 
-  void Clear() { m_entries.clear(); }
+  void Clear() { m_entries.clear(); upper_bound_computed = false; }
 
   bool IsEmpty() const { return m_entries.empty(); }
 
@@ -708,22 +729,41 @@
   }
 
   uint32_t FindEntryIndexesThatContain(B addr,
-                                       std::vector<uint32_t> &indexes) const {
+                                       std::vector<uint32_t> &indexes) {
 #ifdef ASSERT_RANGEMAP_ARE_SORTED
     assert(IsSorted());
 #endif
-    // Search the entries until the first entry that has a larger base address
-    // than `addr`. As m_entries is sorted by their base address, all following
-    // entries can't contain `addr` as their base address is already larger.
-    for (const auto &entry : m_entries) {
-      if (entry.Contains(addr))
-        indexes.push_back(entry.data);
-      else if (entry.GetRangeBase() > addr)
-        break;
-    }
+    if (!upper_bound_computed)
+      ComputeUpperBounds();
+
+    FindEntryIndexesThatContain(addr, 0, m_entries.size(), indexes);
+
     return indexes.size();
   }
 
+  void FindEntryIndexesThatContain(B addr, int lo, int hi,
+                                   std::vector<uint32_t> &indexes) {
+    if (lo > hi) return;
+    int mid = (lo + hi) / 2;
+    auto entry = m_entries[mid];
+
+    // addr is greater than the rightmost point of any interval below mid
+    // so there are cannot be any matches.
+    if (addr > entry.upper_bound) return;
+
+    // Recursively search left subtree
+    FindEntryIndexesThatContain(addr, lo, mid - 1, indexes);
+
+    // If addr is smaller than the start of the current interval it
+    // cannot contain it nor can any of its right subtree.
+    if (addr < entry.base) return;
+
+    if (entry.Contains(addr))
+      indexes.push_back(entry.data);
+
+    FindEntryIndexesThatContain(addr, mid + 1, hi, indexes);
+  }
+
   Entry *FindEntryThatContains(B addr) {
     return const_cast<Entry *>(
         static_cast<const RangeDataVector *>(this)->FindEntryThatContains(
@@ -806,6 +846,7 @@
 protected:
   Collection m_entries;
   Compare m_compare;
+  bool upper_bound_computed;
 };
 
 // A simple range  with data class where you get to define the type of

_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

[Lldb-commits] [PATCH] D74759: Treat RangeDataVector as an augmented BST

Reply via email to