kbobyrev created this revision. kbobyrev added reviewers: ioeric, ilya-biryukov. kbobyrev added a project: clang-tools-extra. Herald added subscribers: kadircet, arphaman, jkorous, MaskRay.
https://reviews.llvm.org/D50970 Files: clang-tools-extra/clangd/index/dex/Iterator.cpp clang-tools-extra/clangd/index/dex/Iterator.h clang-tools-extra/unittests/clangd/DexIndexTests.cpp
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp =================================================================== --- clang-tools-extra/unittests/clangd/DexIndexTests.cpp +++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp @@ -175,26 +175,29 @@ // beneficial to implement automatic generation of query trees for more // comprehensive testing. TEST(DexIndexIterators, QueryTree) { - // An example of more complicated query // // +-----------------+ // |And Iterator:1, 5| // +--------+--------+ // | // | - // +------------------------------------+ + // +-------------+----------------------+ // | | // | | // +----------v----------+ +----------v---------+ // |And Iterator: 1, 5, 9| |Or Iterator: 0, 1, 5| // +----------+----------+ +----------+---------+ // | | - // +------+-----+ +---------+-----------+ + // +------+-----+ +---------------------+ // | | | | | - // +-------v-----+ +----v-----+ +--v--+ +-V--+ +---v---+ - // |1, 3, 5, 8, 9| |1, 5, 7, 9| |Empty| |0, 5| |0, 1, 5| - // +-------------+ +----------+ +-----+ +----+ +-------+ - + // +-------v-----+ +----+---+ +--v--+ +---v----+ +----v---+ + // |1, 3, 5, 8, 9| |Boost: 2| |Empty| |Boost: 3| |Boost: 4| + // +-------------+ +----+---+ +-----+ +---+----+ +----+---+ + // | | | + // +----v-----+ +-v--+ +---v---+ + // |1, 5, 7, 9| |0, 5| |0, 1, 5| + // +----------+ +----+ +-------+ + // const PostingList L0 = {1, 3, 5, 8, 9}; const PostingList L1 = {1, 5, 7, 9}; const PostingList L2 = {0, 5}; @@ -204,21 +207,26 @@ // Root of the query tree: [1, 5] auto Root = createAnd( // Lower And Iterator: [1, 5, 9] - createAnd(create(L0), create(L1)), + createAnd(create(L0), createBoost(create(L1), 2U)), // Lower Or Iterator: [0, 1, 5] - createOr(create(L2), create(L3), create(L4))); + createOr(create(L2), createBoost(create(L3), 3U), + createBoost(create(L4), 4U))); EXPECT_FALSE(Root->reachedEnd()); EXPECT_EQ(Root->peek(), 1U); Root->advanceTo(0); // Advance multiple times. Shouldn't do anything. Root->advanceTo(1); Root->advanceTo(0); EXPECT_EQ(Root->peek(), 1U); + auto ElementBoost = Root->boost(Root->peek()); + EXPECT_THAT(ElementBoost, 6); Root->advance(); EXPECT_EQ(Root->peek(), 5U); Root->advanceTo(5); EXPECT_EQ(Root->peek(), 5U); + ElementBoost = Root->boost(Root->peek()); + EXPECT_THAT(ElementBoost, 8); Root->advanceTo(9000); EXPECT_TRUE(Root->reachedEnd()); } @@ -275,6 +283,34 @@ EXPECT_THAT(consume(*AndIterator), ElementsAre(1, 2, 5)); } +TEST(DexIndexIterators, Boost) { + auto BoostIterator = createBoost(createTrue(5U), 42U); + EXPECT_FALSE(BoostIterator->reachedEnd()); + auto ElementBoost = BoostIterator->boost(BoostIterator->peek()); + EXPECT_THAT(ElementBoost, 42U); + + const PostingList L0 = {2, 4}; + const PostingList L1 = {1, 4}; + auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U), + createBoost(create(L1), 3U)); + + ElementBoost = Root->boost(Root->peek()); + EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE); + Root->advance(); + EXPECT_THAT(Root->peek(), 1U); + ElementBoost = Root->boost(Root->peek()); + EXPECT_THAT(ElementBoost, 3); + + Root->advance(); + EXPECT_THAT(Root->peek(), 2U); + ElementBoost = Root->boost(Root->peek()); + EXPECT_THAT(ElementBoost, 2); + + Root->advanceTo(4); + ElementBoost = Root->boost(Root->peek()); + EXPECT_THAT(ElementBoost, 3); +} + testing::Matcher<std::vector<Token>> trigramsAre(std::initializer_list<std::string> Trigrams) { std::vector<Token> Tokens; Index: clang-tools-extra/clangd/index/dex/Iterator.h =================================================================== --- clang-tools-extra/clangd/index/dex/Iterator.h +++ clang-tools-extra/clangd/index/dex/Iterator.h @@ -89,6 +89,8 @@ /// /// Note: reachedEnd() must be false. virtual DocID peek() const = 0; + // FIXME(kbobyrev): Document this one. + virtual unsigned boost(DocID ID) const = 0; virtual ~Iterator() {} @@ -107,6 +109,8 @@ return Iterator.dump(OS); } + const static unsigned DEFAULT_BOOST_SCORE; + private: virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0; }; @@ -116,6 +120,11 @@ std::vector<DocID> consume(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max()); +// +std::vector<std::pair<DocID, unsigned>> +consumeAndBoost(Iterator &It, + size_t Limit = std::numeric_limits<size_t>::max()); + /// Returns a document iterator over given PostingList. std::unique_ptr<Iterator> create(PostingListRef Documents); @@ -133,6 +142,11 @@ /// all items in range [0, Size) in an efficient manner. std::unique_ptr<Iterator> createTrue(DocID Size); +/// Returns BOOST iterator which multiplies the score of each item by +/// given factor. +std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child, + unsigned Factor); + /// This allows createAnd(create(...), create(...)) syntax. template <typename... Args> std::unique_ptr<Iterator> createAnd(Args... args) { std::vector<std::unique_ptr<Iterator>> Children; Index: clang-tools-extra/clangd/index/dex/Iterator.cpp =================================================================== --- clang-tools-extra/clangd/index/dex/Iterator.cpp +++ clang-tools-extra/clangd/index/dex/Iterator.cpp @@ -16,6 +16,8 @@ namespace clangd { namespace dex { +unsigned const Iterator::DEFAULT_BOOST_SCORE = 1U; + namespace { /// Implements Iterator over a PostingList. DocumentIterator is the most basic @@ -46,6 +48,8 @@ return *Index; } + unsigned boost(DocID ID) const override { return DEFAULT_BOOST_SCORE; } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << '['; @@ -103,6 +107,16 @@ DocID peek() const override { return Children.front()->peek(); } + unsigned boost(DocID ID) const override { + if (peek() != ID) + return DEFAULT_BOOST_SCORE; + return std::accumulate( + begin(Children), end(Children), DEFAULT_BOOST_SCORE, + [&](unsigned Current, const std::unique_ptr<Iterator> &Child) { + return Current * Child->boost(ID); + }); + } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << "(& "; @@ -209,6 +223,16 @@ return Result; } + unsigned boost(DocID ID) const override { + if (peek() != ID) + return DEFAULT_BOOST_SCORE; + return std::accumulate( + begin(Children), end(Children), DEFAULT_BOOST_SCORE, + [&](unsigned Current, const std::unique_ptr<Iterator> &Child) { + return std::max(Current, Child->boost(ID)); + }); + } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << "(| "; @@ -249,6 +273,8 @@ return Index; } + unsigned boost(DocID) const override { return DEFAULT_BOOST_SCORE; } + private: llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { OS << "(TRUE {" << Index << "} out of " << Size << ")"; @@ -260,16 +286,57 @@ DocID Size; }; +// FIXME(kbobryev): Add documentation. +class BoostIterator : public Iterator { +public: + BoostIterator(std::unique_ptr<Iterator> Child, unsigned Factor) + : Child(move(Child)), Factor(Factor) {} + + bool reachedEnd() const override { return Child->reachedEnd(); } + + void advance() override { Child->advance(); } + + void advanceTo(DocID ID) override { Child->advanceTo(ID); } + + DocID peek() const override { return Child->peek(); } + + unsigned boost(DocID ID) const override { + return (reachedEnd() || peek() != ID) ? DEFAULT_BOOST_SCORE + : Child->boost(ID) * Factor; + } + +private: + llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { + OS << "(BOOST " << Factor << ' ' << *Child << ')'; + return OS; + } + + std::unique_ptr<Iterator> Child; + unsigned Factor; +}; + } // end namespace +// FIXME(kbobyrev): Update DexIndex and include two-stage retrieval. std::vector<DocID> consume(Iterator &It, size_t Limit) { std::vector<DocID> Result; for (size_t Retrieved = 0; !It.reachedEnd() && Retrieved < Limit; It.advance(), ++Retrieved) Result.push_back(It.peek()); return Result; } +std::vector<std::pair<DocID, unsigned>> consumeAndBoost(Iterator &It, + size_t Limit) { + std::vector<std::pair<DocID, unsigned>> Result; + for (size_t Retrieved = 0; !It.reachedEnd() && Retrieved < Limit; + It.advance(), ++Retrieved) { + DocID Document = It.peek(); + Result.push_back(std::make_pair(Document, It.boost(Document))); + } + return Result; +} + std::unique_ptr<Iterator> create(PostingListRef Documents) { return llvm::make_unique<DocumentIterator>(Documents); } @@ -288,6 +355,11 @@ return llvm::make_unique<TrueIterator>(Size); } +std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child, + unsigned Factor) { + return llvm::make_unique<BoostIterator>(move(Child), Factor); +} + } // namespace dex } // namespace clangd } // namespace clang
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits