unnar updated this revision to Diff 246679.
unnar marked 5 inline comments as done.
unnar added a comment.

In D74759#1880499 <https://reviews.llvm.org/D74759#1880499>, @labath wrote:

> Have you done a proper complexity analysis here? I doubt the `O(log n)` claim 
> is true in general. It would have to be at least `O(m + log n)` (m - number 
> of elements found), but it's not clear to me whether even this is true in 
> general. (However, I believe this can achieve ~~log(n) for non-degenerate 
> cases.)


I should have been more specific, searching for any interval that contains a 
point can be done in `O(log n)` but in our case where we are searching for all 
intervals that contain the point and as Jarin said we believe it takes `O(m log 
n)` (I admit I have not done a thorough analysis of the time complexity 
myself). The theoretical best you can do is indeed `O(m + log n)`.

> The implementation itself needs some work though. My incomplete list of 
> comments is:
> 
> - replace `int` with `size_t` and closed intervals with half-open ones

Done.

> - let's move the computation of the upper bound into the "Sort" function. 
> sorting is O(n log(n)), this is O(n) -- we can just hide it there.

That was my first thought but I decided to have it generated lazily instead, I 
will move it back to the sort.

> - make private functions private

Done.

> - we should avoid the business of figuring out what is the suitable "minimum" 
> value of B by ensuring we call the recursive function on non-empty intervals

Done.

> - clang-format the patch

Done.

> For testing you should add some c++ unit tests for the relevant interfaces.

Done. Do you think we should also unit test ComputeUpperBounds or just the 
interface?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74759/new/

https://reviews.llvm.org/D74759

Files:
  lldb/include/lldb/Utility/RangeMap.h
  lldb/unittests/Utility/RangeMapTest.cpp

Index: lldb/unittests/Utility/RangeMapTest.cpp
===================================================================
--- lldb/unittests/Utility/RangeMapTest.cpp
+++ lldb/unittests/Utility/RangeMapTest.cpp
@@ -19,6 +19,13 @@
   return testing::Pointee(testing::Field(&EntryT::data, ID));
 }
 
+const std::vector<uint32_t> FindEntryIndexes(uint32_t address,
+                                             RangeDataVectorT map) {
+  std::vector<uint32_t> result;
+  map.FindEntryIndexesThatContain(address, result);
+  return result;
+}
+
 TEST(RangeDataVector, FindEntryThatContains) {
   RangeDataVectorT Map;
   uint32_t NextID = 0;
@@ -94,3 +101,37 @@
   EXPECT_THAT(MapC.GetEntryRef(2).data, 51);
   EXPECT_THAT(MapC.GetEntryRef(3).data, 50);
 }
+
+TEST(RangeDataVector, FindEntryIndexesThatContain) {
+  RangeDataVectorT Map;
+  Map.Append(EntryT(0, 10, 10));
+  Map.Append(EntryT(10, 10, 11));
+  Map.Append(EntryT(20, 10, 12));
+  Map.Sort();
+
+  EXPECT_THAT(FindEntryIndexes(0, Map), testing::ElementsAre(10));
+  EXPECT_THAT(FindEntryIndexes(9, Map), testing::ElementsAre(10));
+  EXPECT_THAT(FindEntryIndexes(10, Map), testing::ElementsAre(11));
+  EXPECT_THAT(FindEntryIndexes(19, Map), testing::ElementsAre(11));
+  EXPECT_THAT(FindEntryIndexes(20, Map), testing::ElementsAre(12));
+  EXPECT_THAT(FindEntryIndexes(29, Map), testing::ElementsAre(12));
+  EXPECT_THAT(FindEntryIndexes(30, Map), testing::ElementsAre());
+}
+
+TEST(RangeDataVector, FindEntryIndexesThatContain_Overlap) {
+  RangeDataVectorT Map;
+  Map.Append(EntryT(0, 40, 10));
+  Map.Append(EntryT(10, 20, 11));
+  Map.Append(EntryT(20, 10, 12));
+  Map.Sort();
+
+  EXPECT_THAT(FindEntryIndexes(0, Map), testing::ElementsAre(10));
+  EXPECT_THAT(FindEntryIndexes(9, Map), testing::ElementsAre(10));
+  EXPECT_THAT(FindEntryIndexes(10, Map), testing::ElementsAre(10, 11));
+  EXPECT_THAT(FindEntryIndexes(19, Map), testing::ElementsAre(10, 11));
+  EXPECT_THAT(FindEntryIndexes(20, Map), testing::ElementsAre(10, 11, 12));
+  EXPECT_THAT(FindEntryIndexes(29, Map), testing::ElementsAre(10, 11, 12));
+  EXPECT_THAT(FindEntryIndexes(30, Map), testing::ElementsAre(10));
+  EXPECT_THAT(FindEntryIndexes(39, Map), testing::ElementsAre(10));
+  EXPECT_THAT(FindEntryIndexes(40, Map), testing::ElementsAre());
+}
Index: lldb/include/lldb/Utility/RangeMap.h
===================================================================
--- lldb/include/lldb/Utility/RangeMap.h
+++ lldb/include/lldb/Utility/RangeMap.h
@@ -592,13 +592,18 @@
 struct RangeData : public Range<B, S> {
   typedef T DataType;
 
+  B upper_bound;
   DataType data;
 
-  RangeData() : Range<B, S>(), data() {}
+  RangeData() : Range<B, S>(), upper_bound(), data() {}
 
-  RangeData(B base, S size) : Range<B, S>(base, size), data() {}
+  RangeData(B base, S size) : Range<B, S>(base, size), upper_bound(), data() {}
 
-  RangeData(B base, S size, DataType d) : Range<B, S>(base, size), data(d) {}
+  RangeData(B base, S size, DataType d)
+      : Range<B, S>(base, size), upper_bound(), data(d) {}
+
+  RangeData(B base, S size, B ub, DataType d)
+      : Range<B, S>(base, size), upper_bound(ub), data(d) {}
 };
 
 template <typename B, typename S, typename T, unsigned N = 0,
@@ -625,13 +630,13 @@
                            return a.size < b.size;
                          return compare(a.data, b.data);
                        });
+    if (!m_entries.empty())
+      ComputeUpperBounds(0, m_entries.size());
   }
 
 #ifdef ASSERT_RANGEMAP_ARE_SORTED
   bool IsSorted() const {
     typename Collection::const_iterator pos, end, prev;
-    // First we determine if we can combine any of the Entry objects so we
-    // don't end up allocating and making a new collection for no reason
     for (pos = m_entries.begin(), end = m_entries.end(), prev = end; pos != end;
          prev = pos++) {
       if (prev != end && *pos < *prev)
@@ -707,20 +712,13 @@
     return UINT32_MAX;
   }
 
-  uint32_t FindEntryIndexesThatContain(B addr,
-                                       std::vector<uint32_t> &indexes) const {
+  uint32_t FindEntryIndexesThatContain(B addr, std::vector<uint32_t> &indexes) {
 #ifdef ASSERT_RANGEMAP_ARE_SORTED
     assert(IsSorted());
 #endif
-    // Search the entries until the first entry that has a larger base address
-    // than `addr`. As m_entries is sorted by their base address, all following
-    // entries can't contain `addr` as their base address is already larger.
-    for (const auto &entry : m_entries) {
-      if (entry.Contains(addr))
-        indexes.push_back(entry.data);
-      else if (entry.GetRangeBase() > addr)
-        break;
-    }
+    if (!m_entries.empty())
+      FindEntryIndexesThatContain(addr, 0, m_entries.size(), indexes);
+
     return indexes.size();
   }
 
@@ -806,6 +804,54 @@
 protected:
   Collection m_entries;
   Compare m_compare;
+
+private:
+  // We can treat the vector as a flattened Binary Search Tree, augmenting it
+  // with upper bounds (max of range endpoints) for every index allows us to
+  // query for range containment faster.
+  B ComputeUpperBounds(size_t lo, size_t hi) {
+    size_t mid = (lo + hi) / 2;
+    auto &entry = m_entries[mid];
+
+    entry.upper_bound = entry.base + entry.size;
+
+    if (lo < mid)
+      entry.upper_bound =
+          std::max(entry.upper_bound, ComputeUpperBounds(lo, mid));
+
+    if (mid + 1 < hi)
+      entry.upper_bound =
+          std::max(entry.upper_bound, ComputeUpperBounds(mid + 1, hi));
+
+    return entry.upper_bound;
+  }
+
+  void FindEntryIndexesThatContain(B addr, size_t lo, size_t hi,
+                                   std::vector<uint32_t> &indexes) {
+    size_t mid = (lo + hi) / 2;
+    auto entry = m_entries[mid];
+
+    // addr is greater than the rightmost point of any interval below mid
+    // so there are cannot be any matches.
+    if (addr > entry.upper_bound)
+      return;
+
+    // Recursively search left subtree
+    if (lo < mid)
+      FindEntryIndexesThatContain(addr, lo, mid, indexes);
+
+    // If addr is smaller than the start of the current interval it
+    // cannot contain it nor can any of its right subtree.
+    if (addr < entry.base)
+      return;
+
+    if (entry.Contains(addr))
+      indexes.push_back(entry.data);
+
+    // Recursively search right subtree
+    if (mid + 1 < hi)
+      FindEntryIndexesThatContain(addr, mid + 1, hi, indexes);
+  }
 };
 
 // A simple range  with data class where you get to define the type of
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to