Hi, I am try to remove user defined duplicate from search result.
like below documents match the query. when query return, I try to remove doc3 from result since it has duplicate guid with doc1. Id (uniqueKey) guid doc1 G1 doc2 G2 doc3 G1 To do this, I generate exclude list based guid field terms. For each term, we add from the second document to exclude list. And add these docs to QueryCommand filter. If there any better approach to handler this requirement? Below is code change in SolrIndexSearcer.java private TreeMap<String, BitDocSet> dupDocs = null; public QueryResult search(QueryResult qr, QueryCommand cmd) throws IOException { if (cmd.getUniqueField() != null) { DocSet filter = getDuplicateByField(cmd.getUniqueField()); if (cmd.getFilter() != null) cmd.getFilter().addAllTo(filter); cmd.setFilter(filter); } getDocListC(qr,cmd); return qr; } private synchronized BitDocSet getDuplicateByField(String field) throws IOException { if (dupDocs != null && dupDocs.containsKey(field)) { return dupDocs.get(field); } if (dupDocs == null) { dupDocs = new TreeMap<String, BitDocSet>(); } LeafReader reader = getLeafReader(); BitDocSet res = new BitDocSet(new FixedBitSet(maxDoc())); Terms terms = reader.terms(field); if (terms == null) { dupDocs.put(field, res); return res; } TermsEnum termEnum = terms.iterator(); PostingsEnum docs = null; BytesRef term = null; while ((term = termEnum.next()) != null) { docs = termEnum.postings(docs, PostingsEnum.NONE); // slip first document docs.nextDoc(); int docID = 0; while ((docID = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { res.add(docID); } } dupDocs.put(field, res); return res; } Thanks, Yongtao