unnar updated this revision to Diff 246679.
unnar marked 5 inline comments as done.
unnar added a comment.
In D74759#1880499 <https://reviews.llvm.org/D74759#1880499>, @labath wrote:
> Have you done a proper complexity analysis here? I doubt the `O(log n)` claim
> is true in general. It would have to be at least `O(m + log n)` (m - number
> of elements found), but it's not clear to me whether even this is true in
> general. (However, I believe this can achieve ~~log(n) for non-degenerate
> cases.)
I should have been more specific, searching for any interval that contains a
point can be done in `O(log n)` but in our case where we are searching for all
intervals that contain the point and as Jarin said we believe it takes `O(m log
n)` (I admit I have not done a thorough analysis of the time complexity
myself). The theoretical best you can do is indeed `O(m + log n)`.
> The implementation itself needs some work though. My incomplete list of
> comments is:
>
> - replace `int` with `size_t` and closed intervals with half-open ones
Done.
> - let's move the computation of the upper bound into the "Sort" function.
> sorting is O(n log(n)), this is O(n) -- we can just hide it there.
That was my first thought but I decided to have it generated lazily instead, I
will move it back to the sort.
> - make private functions private
Done.
> - we should avoid the business of figuring out what is the suitable "minimum"
> value of B by ensuring we call the recursive function on non-empty intervals
Done.
> - clang-format the patch
Done.
> For testing you should add some c++ unit tests for the relevant interfaces.
Done. Do you think we should also unit test ComputeUpperBounds or just the
interface?
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D74759/new/
https://reviews.llvm.org/D74759
Files:
lldb/include/lldb/Utility/RangeMap.h
lldb/unittests/Utility/RangeMapTest.cpp
Index: lldb/unittests/Utility/RangeMapTest.cpp
===================================================================
--- lldb/unittests/Utility/RangeMapTest.cpp
+++ lldb/unittests/Utility/RangeMapTest.cpp
@@ -19,6 +19,13 @@
return testing::Pointee(testing::Field(&EntryT::data, ID));
}
+const std::vector<uint32_t> FindEntryIndexes(uint32_t address,
+ RangeDataVectorT map) {
+ std::vector<uint32_t> result;
+ map.FindEntryIndexesThatContain(address, result);
+ return result;
+}
+
TEST(RangeDataVector, FindEntryThatContains) {
RangeDataVectorT Map;
uint32_t NextID = 0;
@@ -94,3 +101,37 @@
EXPECT_THAT(MapC.GetEntryRef(2).data, 51);
EXPECT_THAT(MapC.GetEntryRef(3).data, 50);
}
+
+TEST(RangeDataVector, FindEntryIndexesThatContain) {
+ RangeDataVectorT Map;
+ Map.Append(EntryT(0, 10, 10));
+ Map.Append(EntryT(10, 10, 11));
+ Map.Append(EntryT(20, 10, 12));
+ Map.Sort();
+
+ EXPECT_THAT(FindEntryIndexes(0, Map), testing::ElementsAre(10));
+ EXPECT_THAT(FindEntryIndexes(9, Map), testing::ElementsAre(10));
+ EXPECT_THAT(FindEntryIndexes(10, Map), testing::ElementsAre(11));
+ EXPECT_THAT(FindEntryIndexes(19, Map), testing::ElementsAre(11));
+ EXPECT_THAT(FindEntryIndexes(20, Map), testing::ElementsAre(12));
+ EXPECT_THAT(FindEntryIndexes(29, Map), testing::ElementsAre(12));
+ EXPECT_THAT(FindEntryIndexes(30, Map), testing::ElementsAre());
+}
+
+TEST(RangeDataVector, FindEntryIndexesThatContain_Overlap) {
+ RangeDataVectorT Map;
+ Map.Append(EntryT(0, 40, 10));
+ Map.Append(EntryT(10, 20, 11));
+ Map.Append(EntryT(20, 10, 12));
+ Map.Sort();
+
+ EXPECT_THAT(FindEntryIndexes(0, Map), testing::ElementsAre(10));
+ EXPECT_THAT(FindEntryIndexes(9, Map), testing::ElementsAre(10));
+ EXPECT_THAT(FindEntryIndexes(10, Map), testing::ElementsAre(10, 11));
+ EXPECT_THAT(FindEntryIndexes(19, Map), testing::ElementsAre(10, 11));
+ EXPECT_THAT(FindEntryIndexes(20, Map), testing::ElementsAre(10, 11, 12));
+ EXPECT_THAT(FindEntryIndexes(29, Map), testing::ElementsAre(10, 11, 12));
+ EXPECT_THAT(FindEntryIndexes(30, Map), testing::ElementsAre(10));
+ EXPECT_THAT(FindEntryIndexes(39, Map), testing::ElementsAre(10));
+ EXPECT_THAT(FindEntryIndexes(40, Map), testing::ElementsAre());
+}
Index: lldb/include/lldb/Utility/RangeMap.h
===================================================================
--- lldb/include/lldb/Utility/RangeMap.h
+++ lldb/include/lldb/Utility/RangeMap.h
@@ -592,13 +592,18 @@
struct RangeData : public Range<B, S> {
typedef T DataType;
+ B upper_bound;
DataType data;
- RangeData() : Range<B, S>(), data() {}
+ RangeData() : Range<B, S>(), upper_bound(), data() {}
- RangeData(B base, S size) : Range<B, S>(base, size), data() {}
+ RangeData(B base, S size) : Range<B, S>(base, size), upper_bound(), data() {}
- RangeData(B base, S size, DataType d) : Range<B, S>(base, size), data(d) {}
+ RangeData(B base, S size, DataType d)
+ : Range<B, S>(base, size), upper_bound(), data(d) {}
+
+ RangeData(B base, S size, B ub, DataType d)
+ : Range<B, S>(base, size), upper_bound(ub), data(d) {}
};
template <typename B, typename S, typename T, unsigned N = 0,
@@ -625,13 +630,13 @@
return a.size < b.size;
return compare(a.data, b.data);
});
+ if (!m_entries.empty())
+ ComputeUpperBounds(0, m_entries.size());
}
#ifdef ASSERT_RANGEMAP_ARE_SORTED
bool IsSorted() const {
typename Collection::const_iterator pos, end, prev;
- // First we determine if we can combine any of the Entry objects so we
- // don't end up allocating and making a new collection for no reason
for (pos = m_entries.begin(), end = m_entries.end(), prev = end; pos != end;
prev = pos++) {
if (prev != end && *pos < *prev)
@@ -707,20 +712,13 @@
return UINT32_MAX;
}
- uint32_t FindEntryIndexesThatContain(B addr,
- std::vector<uint32_t> &indexes) const {
+ uint32_t FindEntryIndexesThatContain(B addr, std::vector<uint32_t> &indexes) {
#ifdef ASSERT_RANGEMAP_ARE_SORTED
assert(IsSorted());
#endif
- // Search the entries until the first entry that has a larger base address
- // than `addr`. As m_entries is sorted by their base address, all following
- // entries can't contain `addr` as their base address is already larger.
- for (const auto &entry : m_entries) {
- if (entry.Contains(addr))
- indexes.push_back(entry.data);
- else if (entry.GetRangeBase() > addr)
- break;
- }
+ if (!m_entries.empty())
+ FindEntryIndexesThatContain(addr, 0, m_entries.size(), indexes);
+
return indexes.size();
}
@@ -806,6 +804,54 @@
protected:
Collection m_entries;
Compare m_compare;
+
+private:
+ // We can treat the vector as a flattened Binary Search Tree, augmenting it
+ // with upper bounds (max of range endpoints) for every index allows us to
+ // query for range containment faster.
+ B ComputeUpperBounds(size_t lo, size_t hi) {
+ size_t mid = (lo + hi) / 2;
+ auto &entry = m_entries[mid];
+
+ entry.upper_bound = entry.base + entry.size;
+
+ if (lo < mid)
+ entry.upper_bound =
+ std::max(entry.upper_bound, ComputeUpperBounds(lo, mid));
+
+ if (mid + 1 < hi)
+ entry.upper_bound =
+ std::max(entry.upper_bound, ComputeUpperBounds(mid + 1, hi));
+
+ return entry.upper_bound;
+ }
+
+ void FindEntryIndexesThatContain(B addr, size_t lo, size_t hi,
+ std::vector<uint32_t> &indexes) {
+ size_t mid = (lo + hi) / 2;
+ auto entry = m_entries[mid];
+
+ // addr is greater than the rightmost point of any interval below mid
+ // so there are cannot be any matches.
+ if (addr > entry.upper_bound)
+ return;
+
+ // Recursively search left subtree
+ if (lo < mid)
+ FindEntryIndexesThatContain(addr, lo, mid, indexes);
+
+ // If addr is smaller than the start of the current interval it
+ // cannot contain it nor can any of its right subtree.
+ if (addr < entry.base)
+ return;
+
+ if (entry.Contains(addr))
+ indexes.push_back(entry.data);
+
+ // Recursively search right subtree
+ if (mid + 1 < hi)
+ FindEntryIndexesThatContain(addr, mid + 1, hi, indexes);
+ }
};
// A simple range with data class where you get to define the type of
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits