Hi Upayavira,
Here are a couple examples with debugQuery set.
I've mislead Mikhail as the query times are getting longer as the list of ids
gets bigger.
Can you see a reason why where indexB has only 6 id's in its list it still
takes 46 seconds?
Ids=6
{
"responseHeader": {
"status": 0,
"QTime": 46849,
"params": {
"debugQuery": "true",
"indent": "true",
"start": "0",
"q": "{!join from=sedolKey to=sedolKey fromIndex=indexB}universe:55AL86",
"_": "1441810031117",
"wt": "json"
}
},
"response": {
"numFound": 0,
"start": 0,
"docs": []
},
"debug": {
"join": {
"{!join from=sedolKey to=sedolKey fromIndex=indexB}universe:55AL86": {
"time": 46848,
"fromSetSize": 6,
"toSetSize": 0,
"fromTermCount": 11837043,
"fromTermTotalDf": 11837043,
"fromTermDirectCount": 11837043,
"fromTermHits": 6,
"fromTermHitsTotalDf": 6,
"toTermHits": 0,
"toTermHitsTotalDf": 0,
"toTermDirectCount": 0,
"smallSetsDeferred": 0,
"toSetDocsAdded": 0
}
},
"rawquerystring": "{!join from=longValue to=longValue
fromIndex=indexB}universe:55AL86",
"querystring": "{!join from=longValue to=longValue
fromIndex=indexB}universe:55AL86",
"parsedquery": "JoinQuery({!join from=longValue to=longValue
fromIndex=indexB}universe:55AL86)",
"parsedquery_toString": "{!join from=longValue to=longValue
fromIndex=indexB}universe:55AL86",
"explain": {},
"QParser": "",
"timing": {
"time": 46849,
"prepare": {
"time": 0,
"query": {
"time": 0
},
"facet": {
"time": 0
},
"mlt": {
"time": 0
},
"highlight": {
"time": 0
},
"stats": {
"time": 0
},
"expand": {
"time": 0
},
"debug": {
"time": 0
}
},
"process": {
"time": 46848,
"query": {
"time": 46848
},
"facet": {
"time": 0
},
"mlt": {
"time": 0
},
"highlight": {
"time": 0
},
"stats": {
"time": 0
},
"expand": {
"time": 0
},
"debug": {
"time": 0
}
}
}
}
}
###########################################
Ids=298
{
"responseHeader": {
"status": 0,
"QTime": 51570,
"params": {
"debugQuery": "true",
"indent": "true",
"q": "{!join from=longValue to=longValue
fromIndex=indexB}universe:16XO52",
"_": "1441810442921",
"wt": "json"
}
},
"response": {
"numFound": 0,
"start": 0,
"docs": []
},
"debug": {
"join": {
"{!join from=longValue to=longValue fromIndex=indexB}universe:16XO52": {
"time": 51570,
"fromSetSize": 298,
"toSetSize": 0,
"fromTermCount": 11837043,
"fromTermTotalDf": 11837043,
"fromTermDirectCount": 11837043,
"fromTermHits": 298,
"fromTermHitsTotalDf": 298,
"toTermHits": 0,
"toTermHitsTotalDf": 0,
"toTermDirectCount": 0,
"smallSetsDeferred": 0,
"toSetDocsAdded": 0
}
},
"rawquerystring": "{!join from=longValue to=longValue
fromIndex=indexB}universe:16XO52",
"querystring": "{!join from=longValue to=longValue
fromIndex=indexB}universe:16XO52",
"parsedquery": "JoinQuery({!join from=longValue to=longValue
fromIndex=indexB}universe:16XO52)",
"parsedquery_toString": "{!join from=longValue to=longValue
fromIndex=indexB}universe:16XO52",
"explain": {},
"QParser": "",
"timing": {
"time": 51570,
"prepare": {
"time": 0,
"query": {
"time": 0
},
"facet": {
"time": 0
},
"mlt": {
"time": 0
},
"highlight": {
"time": 0
},
"stats": {
"time": 0
},
"expand": {
"time": 0
},
"debug": {
"time": 0
}
},
"process": {
"time": 51570,
"query": {
"time": 51570
},
"facet": {
"time": 0
},
"mlt": {
"time": 0
},
"highlight": {
"time": 0
},
"stats": {
"time": 0
},
"expand": {
"time": 0
},
"debug": {
"time": 0
}
}
}
}
}
###################################################################
Id's = 424088
"debug": {
"join": {
"{!join from=longValue to=longValue fromIndex=indexB}universe:LARGE": {
"time": 44386,
"fromSetSize": 424088,
"toSetSize": 892314,
"fromTermCount": 11837043,
"fromTermTotalDf": 11837043,
"fromTermDirectCount": 11837043,
"fromTermHits": 420365,
"fromTermHitsTotalDf": 420365,
"toTermHits": 57074,
"toTermHitsTotalDf": 944597,
"toTermDirectCount": 55722,
"smallSetsDeferred": 1352,
"toSetDocsAdded": 892314
}
},
"rawquerystring": "{!join from=longValue to=longValue
fromIndex=indexB}universe:LARGE",
"querystring": "{!join from=longValue to=longValue
fromIndex=indexB}universe:LARGE",
"parsedquery": "JoinQuery({!join from=longValue to=longValue
fromIndex=indexB}universe:LARGE)",
"parsedquery_toString": "{!join from=longValue to=longValue
fromIndex=indexB}universe:LARGE",
"explain": {
"2000000076769983": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@483489aa , product of:\n 1.0
= boost\n 1.0 = queryNorm\n",
"2000000076769984": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@118f9aef , product of:\n 1.0
= boost\n 1.0 = queryNorm\n",
"2000000076769985": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@7a5d18ac , product of:\n 1.0
= boost\n 1.0 = queryNorm\n",
"2000000076769986": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@6d601adc , product of:\n 1.0
= boost\n 1.0 = queryNorm\n",
"2000000076769987": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@2e262f31 , product of:\n 1.0
= boost\n 1.0 = queryNorm\n",
"2000000076769988": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@4b200302 , product of:\n 1.0
= boost\n 1.0 = queryNorm\n",
"2000000076769989": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@569910d2 , product of:\n 1.0
= boost\n 1.0 = queryNorm\n",
"2000000076770006": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@635a3843 , product of:\n 1.0
= boost\n 1.0 = queryNorm\n",
"2000000076770007": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@5b438a92 , product of:\n 1.0
= boost\n 1.0 = queryNorm\n",
"2000000076770029": "\n1.0 = (MATCH)
org.apache.solr.search.JoinQuery$JoinQueryWeight@31d9c4c , product of:\n 1.0 =
boost\n 1.0 = queryNorm\n"
},
"QParser": "",
"timing": {
"time": 480859,
"prepare": {
"time": 0,
"query": {
"time": 0
},
"facet": {
"time": 0
},
"mlt": {
"time": 0
},
"highlight": {
"time": 0
},
"stats": {
"time": 0
},
"expand": {
"time": 0
},
"debug": {
"time": 0
}
},
"process": {
"time": 480859,
"query": {
"time": 43737
},
"facet": {
"time": 0
},
"mlt": {
"time": 0
},
"highlight": {
"time": 0
},
"stats": {
"time": 0
},
"expand": {
"time": 0
},
"debug": {
"time": 437122
}
}
}
}
}
Thanks
Russ.
-----Original Message-----
From: Upayavira [mailto:[email protected]]
Sent: 09 September 2015 13:02
To: [email protected]
Subject: Re: Solr Join between two indexes taking too long.
To explain what a join does:
It goes over to the joined index, and executes a query. This results in a list
of "ids" that will be used to do a search on the main index. The more of these
ids there are, the worse performance will be. Thus, if you have 100k documents
that match in the join core, you will be doing a 100k term search, which will
invariably be painful, because the more terms you include in the search, the
slower it will be.
How many matching docs do you have on the other side of your query?
Upayavira
On Tue, Sep 8, 2015, at 02:09 PM, Russell Taylor wrote:
> Hi,
> I hope somebody can help.
>
> We have two indexes, one which holds the descriptive data and the
> other one which holds lists of docs which are of a certain type
> (called universes in our world). They need to be joined together to
> show a list of data from indexA where a filtered indexB (by
> universe:value) has matching longs (The join field).
>
> At the moment the query is taking 55 seconds we need to get it under a
> second, any help most appreciated.
>
> INDEXES:
>
> Index a (primary index)
> 31 million docs with a converted alphanumeric to a long value with a
> possible 10 million unique values.
>
> Index B (the joined index)
> 250 million documents with a converted alphanumeric to a long value
> with a possible 10 million unique values.
> IndexB is filtered by universe which could be between 1 and 500,000 docs.
>
> QUERY:
> http://127.0.0.1:8080/solr/indexA/select?q={!join+from=longValue+to=lo
> ngValue+fromIndex=IndexB}universe:<http://127.0.0.1:8080/solr/indexA/s
> elect?q=%7b!join+from=longValue+to=longValue+fromIndex=IndexB%7duniver
> se:>universeValue
>
> Qtime is 55 seconds for either a universe of 5 docs or 500,000 docs.
>
>
>
> Thanks
>
>
> Russ.
>
>
> *******************************************************
> This message (including any files transmitted with it) may contain
> confidential and/or proprietary information, is the property of
> Interactive Data Corporation and/or its subsidiaries, and is directed
> only to the addressee(s). If you are not the designated recipient or
> have reason to believe you received this message in error, please
> delete this message from your system and notify the sender
> immediately. An unintended recipient's disclosure, copying,
> distribution, or use of this message or any attachments is prohibited and may
> be unlawful.
> *******************************************************
*******************************************************
This message (including any files transmitted with it) may contain confidential
and/or proprietary information, is the property of Interactive Data Corporation
and/or its subsidiaries, and is directed only to the addressee(s). If you are
not the designated recipient or have reason to believe you received this
message in error, please delete this message from your system and notify the
sender immediately. An unintended recipient's disclosure, copying,
distribution, or use of this message or any attachments is prohibited and may
be unlawful.
*******************************************************