Hi Solr Community,

I've been experimenting with Solr 4.0 (trunk) in order to test the SOLR-792
feature. I have written a test that shows what I'm trying to ask. Basically,
I'm creating a hierarchy of the area/city/neighbourhood. The problem that I
see is that for documents that have only 1 item in a particular hierarchy
(e.g. Greater London/Greenwich/Centre (which I've called
"Value_that_cant_be_matched in this example"...)), these are not found by
the pivot facet. If I add a second one, then it works. I'm puzzled why this
is the case.

This is the result of the Sytem.out that prints out the pivot facet fields
hierarchy (see line 86)

PIVOT: level1_loc_s,level2_loc_s,level3_loc_s
level1_loc_s=Greater London (8)
  level2_loc_s=London (5)
    level3_loc_s=Mayfair (3)
    level3_loc_s=Hammersmith (2)
  level2_loc_s=Greenwich (3)
    level3_loc_s=Greenwich Centre (2)
                                     //--> why isn't there a
"level3_loc_s=Value_that_cant_be_matched (1)" here?
level1_loc_s=Groot Amsterdam (5)
  level2_loc_s=Amsterdam (3)
    level3_loc_s=Jordaan (2)
  level2_loc_s=Amstelveen (2)
    level3_loc_s=Centrum (2)


How can I make sure that Solr would find in the tree the single document
when I facet on this "location" hierarchy?

Thank you very much for your help.

Nicolas

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.NamedList;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

/**
 * This is a test for hiearchical faceting based on SOLR-792 (I basically
just checkout the trunk of Solr-4.0).
 *
 * Unit test that shows the particular behaviour that I'm experiencing.
 * I would have expected that the doc (see line 95) with as level3_loc_s
"Value_that_cant_be_matched" would appear in the pivot. It seems that you
actually need at least 2!
 *
 * @author npeeters
 */
public class HierarchicalPivotTest {

    CommonsHttpSolrServer server;

    @Before
    public void setup() throws MalformedURLException {
        // the instance can be reused
        this.server = new CommonsHttpSolrServer("http://localhost:8983/solr
");
        this.server.setSoTimeout(500); // socket read timeout
        this.server.setConnectionTimeout(100);
        this.server.setDefaultMaxConnectionsPerHost(100);
        this.server.setMaxTotalConnections(100);
        this.server.setFollowRedirects(false); // defaults to false
        // allowCompression defaults to false.
    }

    protected List<SolrInputDocument> createHierarchicalOrgData() {
        int id = 1;
        List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
"level3_loc_s", "Centrum"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
"level3_loc_s", "Jordaan"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
"level3_loc_s", "Jordaan"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amstelveen",
"level3_loc_s", "Centrum"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amstelveen",
"level3_loc_s", "Centrum"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Hammersmith"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Hammersmith"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Mayfair"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Mayfair"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Mayfair"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
"level3_loc_s", "Value_that_cant_be_matched"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
"level3_loc_s", "Greenwich Centre"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
"level3_loc_s", "Greenwich Centre"));
        return docs;
    }

    @Test
    public void hierQueryWithOrgData() throws SolrServerException,
IOException {

        server.deleteByQuery("*:*");// delete everything!
        server.commit();
        assertNumFound("*:*", 0); // make sure it got in

        List<SolrInputDocument> docs = createHierarchicalOrgData();
        server.add(docs);
        server.commit();

        SolrQuery query = new SolrQuery("*:*");
        query.addFacetPivotField("level1_loc_s,level2_loc_s,level3_loc_s");
        query.setFacetMinCount(0);
        query.setRows(20);

        QueryResponse rsp = server.query(query);
        Assert.assertEquals(docs.size(), rsp.getResults().getNumFound());

        NamedList<List<PivotField>> pivots = rsp.getFacetPivot();
        Assert.assertEquals("Only one pivot is defined: '" + "level1_loc_s"
+ "," + "level2_loc_s" + "," + "level3_loc_s" + "'", 1, pivots.size());

        // debug the output
        for (Map.Entry<String, List<PivotField>> entry : pivots) {
            System.out.println("PIVOT: " + entry.getKey());
            for (PivotField p : entry.getValue()) {
                p.write(System.out, 0);
            }
            System.out.println();
            //here you can see already that there's not level3_loc_s for
Greenwich, which is not what I'd expect!
        }

        List<PivotField> pivot = pivots.getVal(0);
        Assert.assertEquals("level1_loc_s" + "," + "level2_loc_s" + "," +
"level3_loc_s", pivots.getName(0));
        Assert.assertEquals("The first level, there should be 2 pivots (one
for 'Greater London' and one for 'Groot Amsterdam'", 2,
                pivot.size());

        // level 1
        PivotField greaterLondon = pivot.get(0);
        Assert.assertEquals("level1_loc_s", greaterLondon.getField());
        Assert.assertEquals("Greater London", greaterLondon.getValue());
        Assert.assertEquals("8 locations under Great London", 8,
greaterLondon.getCount());

        // level 2
        List<PivotField> greaterLondonLocations = greaterLondon.getPivot();
        Assert.assertEquals("The next level in the Greater London hierarchy
has 2 elements: 'Greenwich' and 'London'", 2,
                greaterLondonLocations.size());
        Assert.assertEquals("level2_loc_s",
greaterLondonLocations.get(0).getField());
        Assert.assertEquals("London",
greaterLondonLocations.get(0).getValue());
        Assert.assertEquals("5 neighbourhoods under " +
greaterLondonLocations.get(0).getValue(), 5, greaterLondonLocations.get(0)
                .getCount());

        Assert.assertEquals("level2_loc_s",
greaterLondonLocations.get(0).getField());
        Assert.assertEquals("Greenwich",
greaterLondonLocations.get(1).getValue());
        Assert.assertEquals("3 neighbourhoods under " +
greaterLondonLocations.get(1).getValue(), 3, greaterLondonLocations.get(1)
                .getCount());

        // level 3 (London)
        List<PivotField> londonNeighbourhoods =
greaterLondonLocations.get(0).getPivot();
        List<PivotField> greenwichNeighbourhoods =
greaterLondonLocations.get(1).getPivot();

        Assert.assertEquals("The next level in the Greater London hierarchy
has 2 elements: 'Hammersmith' and 'Mayfair'", 2,
                greaterLondonLocations.size());
        Assert.assertEquals("level3_loc_s",
londonNeighbourhoods.get(1).getField());
        Assert.assertEquals("Mayfair",
londonNeighbourhoods.get(0).getValue());
        Assert.assertEquals("3 orgs in " +
londonNeighbourhoods.get(0).getValue(), 3,
londonNeighbourhoods.get(0).getCount());
        Assert.assertEquals("Hammersmith",
londonNeighbourhoods.get(1).getValue());
        Assert.assertEquals("2 orgs in " +
londonNeighbourhoods.get(1).getValue(), 2,
londonNeighbourhoods.get(1).getCount());

        Assert.assertEquals("Greenwich Centre",
greenwichNeighbourhoods.get(0).getValue());
        Assert.assertEquals("2 orgs in " +
greenwichNeighbourhoods.get(0).getValue(), 2,
greenwichNeighbourhoods.get(0).getCount());

        // NOT WORKING! - why?
        Assert.assertEquals("2 neighbourhoods in Greenwich: WHY IS THAT?",
2, greenwichNeighbourhoods.size());
        Assert.assertEquals("Value_that_cant_be_matched",
greenwichNeighbourhoods.get(1).getValue());
        Assert.assertEquals("Value_that_cant_be_matched" +
greenwichNeighbourhoods.get(1).getValue(), 1, greenwichNeighbourhoods.get(1)
                .getCount());
    }

    public static SolrInputDocument makeTestDoc(Object... kvp) {
        SolrInputDocument doc = new SolrInputDocument();
        for (int i = 0; i < kvp.length;) {
            String k = (String) kvp[i++];
            Object v = kvp[i++];
            doc.addField(k, v);
        }
        return doc;
    }

    protected void assertNumFound(String query, int num) throws
SolrServerException, IOException {
        QueryResponse rsp = server.query(new SolrQuery(query));
        if (num != rsp.getResults().getNumFound()) {
            Assert.fail("expected: " + num + " but had: " +
rsp.getResults().getNumFound() + " :: " + rsp.getResults());
        }
    }

}

Reply via email to