This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new 1b59ae81 [opt](position) add position iterator interface (#169)
1b59ae81 is described below
commit 1b59ae8184ca5f37f400e02aa4a2dd5af290e8d0
Author: zzzxl <[email protected]>
AuthorDate: Wed Jan 3 10:01:29 2024 +0800
[opt](position) add position iterator interface (#169)
---
src/core/CLucene/index/SegmentTermDocs.cpp | 4 +--
src/core/CLucene/index/_SegmentHeader.h | 4 +--
src/core/CLucene/search/query/DcoIdSetIterator.h | 16 ------------
src/core/CLucene/search/query/TermIterator.h | 29 ++++++++++++----------
.../CLucene/search/query/TermPositionIterator.h | 23 +++++++++++++++++
src/core/CMakeLists.txt | 2 +-
6 files changed, 44 insertions(+), 34 deletions(-)
diff --git a/src/core/CLucene/index/SegmentTermDocs.cpp
b/src/core/CLucene/index/SegmentTermDocs.cpp
index 9108f1df..e346dc0c 100644
--- a/src/core/CLucene/index/SegmentTermDocs.cpp
+++ b/src/core/CLucene/index/SegmentTermDocs.cpp
@@ -19,7 +19,7 @@
CL_NS_DEF(index)
SegmentTermDocs::SegmentTermDocs(const SegmentReader *_parent) :
parent(_parent), freqStream(_parent->freqStream->clone()),
- count(0),
df(0), deletedDocs(_parent->deletedDocs), _doc(0), _freq(0),
skipInterval(_parent->tis->getSkipInterval()),
+ count(0),
df(0), deletedDocs(_parent->deletedDocs), _doc(-1), _freq(0),
skipInterval(_parent->tis->getSkipInterval()),
maxSkipLevels(_parent->tis->getMaxSkipLevels()), skipListReader(NULL),
freqBasePointer(0), proxBasePointer(0),
skipPointer(0), haveSkipped(false), pointer(0), pointerMax(0),
indexVersion_(_parent->_fieldInfos->getIndexVersion()),
hasProx(_parent->_fieldInfos->hasProx()), buffer_(freqStream, hasProx,
indexVersion_) {
@@ -73,7 +73,7 @@ void SegmentTermDocs::seek(const TermInfo *ti, Term *term) {
df = 0;
} else {// punt case
df = ti->docFreq;
- _doc = 0;
+ _doc = -1;
freqBasePointer = ti->freqPointer;
proxBasePointer = ti->proxPointer;
skipPointer = freqBasePointer + ti->skipOffset;
diff --git a/src/core/CLucene/index/_SegmentHeader.h
b/src/core/CLucene/index/_SegmentHeader.h
index bf988a2f..c1f01e7c 100644
--- a/src/core/CLucene/index/_SegmentHeader.h
+++ b/src/core/CLucene/index/_SegmentHeader.h
@@ -93,8 +93,8 @@ protected:
int32_t count;
int32_t df;
CL_NS(util)::BitSet* deletedDocs;
- int32_t _doc;
- int32_t _freq;
+ int32_t _doc = -1;
+ int32_t _freq = 0;
int32_t docs[PFOR_BLOCK_SIZE]; // buffered doc numbers
int32_t freqs[PFOR_BLOCK_SIZE]; // buffered term freqs
int32_t pointer;
diff --git a/src/core/CLucene/search/query/DcoIdSetIterator.h
b/src/core/CLucene/search/query/DcoIdSetIterator.h
deleted file mode 100644
index 88aa4313..00000000
--- a/src/core/CLucene/search/query/DcoIdSetIterator.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-#include "CLucene/index/DocRange.h"
-
-class DocIdSetIterator {
-public:
- DocIdSetIterator() = default;
- virtual ~DocIdSetIterator() = default;
-
- virtual int32_t docID() = 0;
- virtual int32_t nextDoc() = 0;
- virtual int32_t advance(int32_t target) = 0;
-
- virtual int32_t docFreq() const = 0;
- virtual bool readRange(DocRange* docRange) const = 0;
-};
\ No newline at end of file
diff --git a/src/core/CLucene/search/query/TermIterator.h
b/src/core/CLucene/search/query/TermIterator.h
index e0cf23a4..3eb22a25 100644
--- a/src/core/CLucene/search/query/TermIterator.h
+++ b/src/core/CLucene/search/query/TermIterator.h
@@ -1,51 +1,54 @@
#pragma once
-#include "CLucene/search/query/DcoIdSetIterator.h"
#include "CLucene/index/Terms.h"
#include <limits.h>
+#include <cstdint>
CL_NS_USE(index)
-class TermIterator : public DocIdSetIterator {
+class TermIterator {
public:
TermIterator() = default;
- TermIterator(TermDocs* termDocs) : termDocs_(termDocs) {
+ TermIterator(TermDocs* termDocs)
+ : termDocs_(termDocs) {
}
- virtual ~TermIterator() = default;
-
- bool isEmpty() {
+ inline bool isEmpty() const {
return termDocs_ == nullptr;
}
- int32_t docID() override {
- uint32_t docId = termDocs_->doc();
+ inline int32_t docID() const {
+ int32_t docId = termDocs_->doc();
return docId >= INT_MAX ? INT_MAX : docId;
}
- int32_t nextDoc() override {
+ inline int32_t freq() const {
+ return termDocs_->freq();
+ }
+
+ inline int32_t nextDoc() const {
if (termDocs_->next()) {
return termDocs_->doc();
}
return INT_MAX;
}
- int32_t advance(int32_t target) override {
+ inline int32_t advance(int32_t target) const {
if (termDocs_->skipTo(target)) {
return termDocs_->doc();
}
return INT_MAX;
}
- int32_t docFreq() const override {
+ inline int32_t docFreq() const {
return termDocs_->docFreq();
}
- bool readRange(DocRange* docRange) const override {
+ inline bool readRange(DocRange* docRange) const {
return termDocs_->readRange(docRange);
}
-private:
+protected:
TermDocs* termDocs_ = nullptr;
};
\ No newline at end of file
diff --git a/src/core/CLucene/search/query/TermPositionIterator.h
b/src/core/CLucene/search/query/TermPositionIterator.h
new file mode 100644
index 00000000..d64af409
--- /dev/null
+++ b/src/core/CLucene/search/query/TermPositionIterator.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "CLucene/search/query/TermIterator.h"
+#include "CLucene/index/Terms.h"
+
+#include <limits.h>
+
+CL_NS_USE(index)
+
+class TermPositionIterator : public TermIterator {
+public:
+ TermPositionIterator() = default;
+ TermPositionIterator(TermPositions* termPositions)
+ : TermIterator(termPositions), termPositions_(termPositions) {
+ }
+
+ inline int32_t nextPosition() const {
+ return termPositions_->nextPosition();
+ }
+
+private:
+ TermPositions* termPositions_ = nullptr;
+};
\ No newline at end of file
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index e1c13305..b9a09bb3 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -199,8 +199,8 @@ SET(clucene_core_Files
./CLucene/search/spans/SpanWeight.cpp
./CLucene/search/spans/SpanWeight.h
./CLucene/search/spans/TermSpans.cpp
- ./CLucene/search/query/DcoIdSetIterator.h
./CLucene/search/query/TermIterator.h
+ ./CLucene/search/query/TermPositionIterator.h
)
#if USE_SHARED_OBJECT_FILES then we link directly to the object files (means
rebuilding them for the core)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]