vthacker commented on a change in pull request #1620: URL: https://github.com/apache/lucene-solr/pull/1620#discussion_r447419709
########## File path: solr/core/src/test/org/apache/solr/schema/RankFieldTest.java ########## @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.Ignore; + +public class RankFieldTest extends SolrTestCaseJ4 { + + private static final String RANK_1 = "rank_1"; + private static final String RANK_2 = "rank_2"; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-minimal.xml","schema-rank-fields.xml"); + } + + @Override + public void setUp() throws Exception { + clearIndex(); + assertU(commit()); + super.setUp(); + } + + public void testInternalFieldName() { + assertEquals("RankField.INTERNAL_RANK_FIELD_NAME changed in an incompatible way", + "_rank_", RankField.INTERNAL_RANK_FIELD_NAME); + } + + public void testBasic() { + assertNotNull(h.getCore().getLatestSchema().getFieldOrNull(RANK_1)); + assertEquals(RankField.class, h.getCore().getLatestSchema().getField(RANK_1).getType().getClass()); + } + + public void testBadFormat() { + ignoreException("Expecting float"); + assertFailedU(adoc( + "id", "1", + RANK_1, "foo" + )); + + assertFailedU(adoc( + "id", "1", + RANK_1, "1.2.3" + )); + + unIgnoreException("Expecting float"); + + ignoreException("must be finite"); + assertFailedU(adoc( + "id", "1", + RANK_1, Float.toString(Float.POSITIVE_INFINITY) + )); + + assertFailedU(adoc( + "id", "1", + RANK_1, Float.toString(Float.NEGATIVE_INFINITY) + )); + + assertFailedU(adoc( + "id", "1", + RANK_1, Float.toString(Float.NaN) + )); + + unIgnoreException("must be finite"); + + ignoreException("must be a positive"); + assertFailedU(adoc( + "id", "1", + RANK_1, Float.toString(-0.0f) + )); + + assertFailedU(adoc( + "id", "1", + RANK_1, Float.toString(-1f) + )); + + assertFailedU(adoc( + "id", "1", + RANK_1, Float.toString(0.0f) + )); + unIgnoreException("must be a positive"); + } + + public void testAddRandom() { + for (int i = 0 ; i < random().nextInt(TEST_NIGHTLY ? 10000 : 100); i++) { + assertU(adoc( + "id", String.valueOf(i), + RANK_1, Float.toString(random().nextFloat()) + )); + } + assertU(commit()); + } + + public void testSkipEmpty() { + assertU(adoc( + "id", "1", + RANK_1, "" + )); + } + + public void testBasicAdd() throws IOException { + assertU(adoc( + "id", "testBasicAdd", + RANK_1, "1" + )); + assertU(commit()); + //assert that the document made it in + assertQ(req("q", "id:testBasicAdd"), "//*[@numFound='1']"); + h.getCore().withSearcher((searcher) -> { + LeafReader reader = searcher.getIndexReader().getContext().leaves().get(0).reader(); + // assert that the field made it in + assertNotNull(reader.getFieldInfos().fieldInfo(RankField.INTERNAL_RANK_FIELD_NAME)); + // assert that the feature made it in + assertTrue(reader.terms(RankField.INTERNAL_RANK_FIELD_NAME).iterator().seekExact(new BytesRef(RANK_1.getBytes(StandardCharsets.UTF_8)))); + return null; + }); + } + + public void testMultipleRankFields() throws IOException { + assertU(adoc( + "id", "testMultiValueAdd", + RANK_1, "1", + RANK_2, "2" + )); + assertU(commit()); + //assert that the document made it in + assertQ(req("q", "id:testMultiValueAdd"), "//*[@numFound='1']"); + h.getCore().withSearcher((searcher) -> { + LeafReader reader = searcher.getIndexReader().getContext().leaves().get(0).reader(); + // assert that the field made it in + assertNotNull(reader.getFieldInfos().fieldInfo(RankField.INTERNAL_RANK_FIELD_NAME)); + // assert that the features made it in + assertTrue(reader.terms(RankField.INTERNAL_RANK_FIELD_NAME).iterator().seekExact(new BytesRef(RANK_2.getBytes(StandardCharsets.UTF_8)))); + assertTrue(reader.terms(RankField.INTERNAL_RANK_FIELD_NAME).iterator().seekExact(new BytesRef(RANK_1.getBytes(StandardCharsets.UTF_8)))); + return null; + }); + } + + public void testSortFails() throws IOException { + assertU(adoc( + "id", "testSortFails", + RANK_1, "1" + )); + assertU(commit()); + assertQEx("Can't sort on rank field", req( + "q", "id:testSortFails", + "sort", RANK_1 + " desc"), 400); + } + + @Ignore("We currently don't fail these kinds of requests with other field types") + public void testFacetFails() throws IOException { + assertU(adoc( + "id", "testFacetFails", + RANK_1, "1" + )); + assertU(commit()); + assertQEx("Can't facet on rank field", req( + "q", "id:testFacetFails", + "facet", "true", + "facet.field", RANK_1), 400); + } + + public void testTermQuery() throws IOException { + assertU(adoc( + "id", "testTermQuery", + RANK_1, "1", + RANK_2, "1" + )); + assertU(adoc( + "id", "testTermQuery2", + RANK_1, "1" + )); + assertU(commit()); + assertQ(req("q", RANK_1 + ":*"), "//*[@numFound='2']"); + assertQ(req("q", RANK_1 + ":[* TO *]"), "//*[@numFound='2']"); + assertQ(req("q", RANK_2 + ":*"), "//*[@numFound='1']"); + assertQ(req("q", RANK_2 + ":[* TO *]"), "//*[@numFound='1']"); + + assertQEx("Term queries not supported", req("q", RANK_1 + ":1"), 400); + assertQEx("Range queries not supported", req("q", RANK_1 + ":[1 TO 10]"), 400); + } + + + public void testResponseQuery() throws IOException { + assertU(adoc( + "id", "testResponseQuery", + RANK_1, "1" + )); + assertU(commit()); + // Ignore requests to retrieve rank + assertQ(req("q", RANK_1 + ":*", + "fl", "id," + RANK_1), + "//*[@numFound='1']", + "count(//result/doc[1]/str)=1"); + } + + public void testRankQParserQuery() throws IOException { + assertU(adoc( + "id", "1", + "str_field", "foo", + RANK_1, "1", + RANK_2, "2" + )); + assertU(adoc( + "id", "2", + "str_field", "foo", + RANK_1, "2", + RANK_2, "1" + )); + assertU(commit()); + assertQ(req("q", "str_field:foo _query_:{!rank f='" + RANK_1 + "' function='log' scalingFactor='1'}"), + "//*[@numFound='2']", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='1']"); + + assertQ(req("q", "str_field:foo _query_:{!rank f='" + RANK_2 + "' function='log' scalingFactor='1'}"), + "//*[@numFound='2']", + "//result/doc[1]/str[@name='id'][.='1']", + "//result/doc[2]/str[@name='id'][.='2']"); + + assertQ(req("q", "foo", + "defType", "dismax", + "qf", "str_field^10", + "bq", "{!rank f='" + RANK_1 + "' function='log' scalingFactor='1'}" + ), + "//*[@numFound='2']", + "//result/doc[1]/str[@name='id'][.='2']", + "//result/doc[2]/str[@name='id'][.='1']"); + + assertQ(req("q", "foo", + "defType", "dismax", + "qf", "str_field^10", + "bq", "{!rank f='" + RANK_2 + "' function='log' scalingFactor='1'}" + ), + "//*[@numFound='2']", + "//result/doc[1]/str[@name='id'][.='1']", + "//result/doc[2]/str[@name='id'][.='2']"); + } Review comment: Also I was curious to see if `debugQuery=true` does the right thing ( it does! ) ``` <?xml version="1.0" encoding="UTF-8"?> <response> <lst name="responseHeader"> <int name="status">0</int> <int name="QTime">161</int> <lst name="params"> <str name="q">foo</str> <str name="defType">dismax</str> <str name="qf">str_field^10</str> <str name="fl">*,score,rank_1,rank_2</str> <str name="wt">xml</str> <str name="debugQuery">true</str> <str name="bq">{!rank f='rank_2' function='log' scalingFactor='1'}</str> </lst> </lst> <result name="response" numFound="2" start="0" maxScore="1.9273467" numFoundExact="true"> <doc> <str name="id">1</str> <str name="str_field">foo</str> <float name="score">1.9273467</float> </doc> <doc> <str name="id">2</str> <str name="str_field">foo</str> <float name="score">1.5218816</float> </doc> </result> <lst name="debug"> <str name="rawquerystring">foo</str> <str name="querystring">foo</str> <str name="parsedquery">+DisjunctionMaxQuery(((str_field:foo)^10.0)) () FeatureQuery(FeatureQuery(field=_rank_, feature=rank_2, function=LogFunction(scalingFactor=1.0)))</str> <str name="parsedquery_toString">+((str_field:foo)^10.0) () FeatureQuery(field=_rank_, feature=rank_2, function=LogFunction(scalingFactor=1.0))</str> <lst name="explain"> <str>1.9273467 = sum of: 0.82873434 = weight(str_field:foo in 0) [SchemaSimilarity], result of: 0.82873434 = score(freq=1.0), computed as boost * idf * tf from: 10.0 = boost 0.18232156 = idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from: 2 = n, number of documents containing term 2 = N, total number of documents with field 0.45454544 = tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from: 1.0 = freq, occurrences of term within document 1.2 = k1, term saturation parameter 0.75 = b, length normalization parameter 1.0 = dl, length of field 1.0 = avgdl, average length of field 1.0986123 = Log function on the _rank_ field for the rank_2 feature, computed as w * log(a + S) from: 1.0 = w, weight of this function 1.0 = a, scaling factor 2.0 = S, feature value</str> <str>1.5218816 = sum of: 0.82873434 = weight(str_field:foo in 1) [SchemaSimilarity], result of: 0.82873434 = score(freq=1.0), computed as boost * idf * tf from: 10.0 = boost 0.18232156 = idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from: 2 = n, number of documents containing term 2 = N, total number of documents with field 0.45454544 = tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from: 1.0 = freq, occurrences of term within document 1.2 = k1, term saturation parameter 0.75 = b, length normalization parameter 1.0 = dl, length of field 1.0 = avgdl, average length of field 0.6931472 = Log function on the _rank_ field for the rank_2 feature, computed as w * log(a + S) from: 1.0 = w, weight of this function 1.0 = a, scaling factor 1.0 = S, feature value</str> </lst> <str name="QParser">DisMaxQParser</str> <null name="altquerystring" /> <arr name="boost_queries"> <str>{!rank f='rank_2' function='log' scalingFactor='1'}</str> </arr> <arr name="parsed_boost_queries"> <str>FeatureQuery(FeatureQuery(field=_rank_, feature=rank_2, function=LogFunction(scalingFactor=1.0)))</str> </arr> <null name="boostfuncs" /> <lst name="timing"> <double name="time">160.0</double> <lst name="prepare"> <double name="time">1.0</double> <lst name="query"> <double name="time">1.0</double> </lst> <lst name="facet"> <double name="time">0.0</double> </lst> <lst name="facet_module"> <double name="time">0.0</double> </lst> <lst name="mlt"> <double name="time">0.0</double> </lst> <lst name="highlight"> <double name="time">0.0</double> </lst> <lst name="stats"> <double name="time">0.0</double> </lst> <lst name="expand"> <double name="time">0.0</double> </lst> <lst name="terms"> <double name="time">0.0</double> </lst> <lst name="debug"> <double name="time">0.0</double> </lst> </lst> <lst name="process"> <double name="time">155.0</double> <lst name="query"> <double name="time">3.0</double> </lst> <lst name="facet"> <double name="time">0.0</double> </lst> <lst name="facet_module"> <double name="time">0.0</double> </lst> <lst name="mlt"> <double name="time">0.0</double> </lst> <lst name="highlight"> <double name="time">0.0</double> </lst> <lst name="stats"> <double name="time">0.0</double> </lst> <lst name="expand"> <double name="time">0.0</double> </lst> <lst name="terms"> <double name="time">0.0</double> </lst> <lst name="debug"> <double name="time">152.0</double> </lst> </lst> </lst> </lst> </response> ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org