Hi I need help porting my lucene code from 4 to 5. In particular, I need to customize a collector (to collect all doc Ids in the index - which can be >30MM docs..). Below is how I achieved this in lucene 4. Is there some guidelines how to do this in lucene 5, specially on semantics changes of AtomicReaderContext (which seems deprecated) and the new LeafReaderContext?
thank you in advance public class CustomCollector extends Collector { private HashSet<String> data = new HashSet<String>(); private Scorer scorer; private int docBase; private BinaryDocValues dataList; public boolean acceptsDocsOutOfOrder() { return true; } public void setScorer(Scorer scorer) { this.scorer = scorer; } public void setNextReader(AtomicReaderContext ctx) throws IOException{ this.docBase = ctx.docBase; dataList = FieldCache.DEFAULT.getTerms(ctx.reader(),"title",false); } public void collect(int doc) throws IOException { BytesRef t = new BytesRef(); dataList(doc); if (t.bytes != BytesRef.EMPTY_BYTES && t.bytes != BytesRef.EMPTY_BYTES) { data((t.utf8ToString())); } } public void reset() { data.clear(); dataList = null; } public HashSet<String> getData() { return data; } }