diegoceccarelli commented on a change in pull request #357: [SOLR-12238] Synonym Queries boost by payload URL: https://github.com/apache/lucene-solr/pull/357#discussion_r370941548
########## File path: lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java ########## @@ -349,25 +361,78 @@ protected Query createFieldQuery(TokenStream source, BooleanClause.Occur operato * returned. When multiple tokens, an ordered <code>SpanNearQuery</code> with slop 0 is returned. */ protected SpanQuery createSpanQuery(TokenStream in, String field) throws IOException { + PayloadAttribute payloadAttribute = null; + if(synonymsBoostByPayload){ + payloadAttribute = in.getAttribute(PayloadAttribute.class); + } TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class); if (termAtt == null) { return null; } List<SpanTermQuery> terms = new ArrayList<>(); + List<BytesRef> payloads = new ArrayList<>(); while (in.incrementToken()) { terms.add(new SpanTermQuery(new Term(field, termAtt.getBytesRef()))); + if(payloadAttribute!=null){ + payloads.add(payloadAttribute.getPayload()); + } + } + in.end(); + in.close(); + + BytesRef[] queryPayloadsArray = payloads.toArray(new BytesRef[payloads.size()]); + float queryPayloadBoost = 0; + if (!payloads.isEmpty()) { + queryPayloadBoost = extractQueryPayload(queryPayloadsArray); } if (terms.isEmpty()) { return null; } else if (terms.size() == 1) { - return terms.get(0); + SpanTermQuery singleTermQuery = terms.get(0); + if (queryPayloadBoost != 0) { + return new SpanBoostQuery(singleTermQuery, queryPayloadBoost); + } else { + return singleTermQuery; + } } else { - return new SpanNearQuery(terms.toArray(new SpanTermQuery[0]), 0, true); + SpanNearQuery multiTermQuery = new SpanNearQuery(terms.toArray(new SpanTermQuery[0]), 0, true); + if (queryPayloadBoost != 0) { + return new SpanBoostQuery(multiTermQuery, queryPayloadBoost); + } else { + return multiTermQuery; + } + } + } + + /*Current assumption is that the user will associate a single payload to the multi terms synonym + * that generated the phrase query, so a valid value for the payload associated to the query is just the first not null payload + * e.g. + * lion => panthera leo|0.99 + * "panthera leo" query will have associated Payloads [null,0.99] + * So the payload associated to the query will be 0.99 which is the first not null + * */ + protected float extractQueryPayload(BytesRef[] payloadsForQueryTerms) { + for (BytesRef singlePayload : payloadsForQueryTerms) { + if (singlePayload != null) { + float decodedPayload = decodeFloat(singlePayload.bytes, singlePayload.offset); + return decodedPayload; + } } + return 0; } + public static final float decodeFloat(byte [] bytes, int offset){ Review comment: public intended? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org