Ankit:
I wrote some lucene sample code using your data and query.
I also provided gfsh commands to create nested query.
Note: I purposely provided 2 data to show the difference of query.
package examples;
import org.apache.geode.cache.Region;
import org.apache.geode.cache.client.ClientCache;
import org.apache.geode.cache.client.ClientCacheFactory;
import org.apache.geode.cache.client.ClientRegionShortcut;
import org.apache.geode.cache.lucene.LuceneQuery;
import org.apache.geode.cache.lucene.LuceneQueryException;
import org.apache.geode.cache.lucene.LuceneServiceProvider;
import org.apache.geode.cache.lucene.PageableLuceneQueryResults;
import org.apache.geode.cache.lucene.internal.LuceneIndexImpl;
import org.apache.geode.cache.lucene.internal.LuceneServiceImpl;
import org.apache.geode.pdx.JSONFormatter;
import org.apache.geode.pdx.PdxInstance;
import java.io.IOException;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
public class JSONTest {
//NOTE: Below is truncated json, single json document can max contain an
array of col1...col30 (30 diff attributes)
// within data.
public final static String jsonDoc_2 = "{" +
"\"data\":[{" +
"\"col1\": {" +
"\"k11\": \"aaa\"," +
"\"k12\":true," +
"\"k13\": 1111," +
"\"k14\": \"2020-12-31:00:00:00\"" +
"}," +
"\"col2\":[{" +
"\"k21\": \"222222\"," +
"\"k22\": true" +
"}]" +
"}]" +
"}";
public final static String jsonDoc_3 = "{" +
"\"data\":[{" +
"\"col1\": {" +
"\"k11\": \"bbb\"," +
"\"k12\":true," +
"\"k13\": 1111," +
"\"k14\": \"2020-12-31:00:00:00\"" +
"}," +
"\"col2\":[{" +
"\"k21\": \"333333\"," +
"\"k22\": true" +
"}]" +
"}]" +
"}";
//NOTE: Col1....col30 are mix of JSONObject ({}) and JSONArray ([]) as shown
above in jsonDoc_2;
public final static String REGION_NAME = "REGION_NAME";
public static void main(String[] args) throws InterruptedException,
LuceneQueryException {
//create client-cache
ClientCache cache = new
ClientCacheFactory().addPoolLocator("localhost",
10334).setPdxReadSerialized(true).create();
Region<String, PdxInstance> region = cache.<String,
PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
.create(REGION_NAME);
//store json document
region.put("key", JSONFormatter.fromJSON(jsonDoc_2));
region.put("key3", JSONFormatter.fromJSON(jsonDoc_3));
LuceneServiceImpl service = (LuceneServiceImpl)
LuceneServiceProvider.get(cache);
LuceneIndexImpl index = (LuceneIndexImpl) service.getIndex("jsonIndex",
"REGION_NAME");
if (index != null) {
service.waitUntilFlushed("jsonIndex", "REGION_NAME", 60000,
TimeUnit.MILLISECONDS);
}
LuceneQuery query = service.createLuceneQueryFactory().create("jsonIndex",
"REGION_NAME",
"222222 OR 333333", "data.col2.k21");
System.out.println("Query 222222 OR 333333");
HashSet results = getResults(query, "REGION_NAME");
LuceneQuery query2 = service.createLuceneQueryFactory().create("jsonIndex",
"REGION_NAME",
"aaa OR xxx OR yyy", "data.col1.k11");
System.out.println("Query aaa OR xxx OR yyy");
results = getResults(query2, "REGION_NAME");
// server side:
// gfsh> start locator
// gfsh> start server --name=server50505 --server-port=50505
// gfsh> create lucene index --name=jsonIndex --region=/REGION_NAME
--field=data.col2.k21,data.col1.k11
// --serializer=org.apache.geode.cache.lucene.FlatFormatSerializer
// gfsh> create region --name=REGION_NAME --type=PARTITION
--redundant-copies=1 --total-num-buckets=61
// How to query json document like,
// 1. select col2.k21, col1, col20 from /REGION_NAME where
//data.col2.k21 = '222222' OR data.col2.k21 = '333333'
// 2. select col2.k21, col1.k11, col1 from /REGION_NAME where
// data.col1.k11 in ('aaa', 'xxx', 'yyy')
}
private static HashSet getResults(LuceneQuery query, String regionName)
throws LuceneQueryException {
if (query == null) {
return null;
}
PageableLuceneQueryResults<Object, Object> results = query.findPages();
if (results.size() > 0) {
System.out.println("Search found " + results.size() + " results in " +
regionName + ", page size is " + query.getPageSize());
}
HashSet values = new HashSet<>();
while (results.hasNext()) {
results.next().stream()
.forEach(struct -> {
Object value = struct.getValue();
if (value instanceof PdxInstance) {
PdxInstance pdx = (PdxInstance) value;
String jsonString = JSONFormatter.toJSON(pdx);
List<PdxInstance> dataList =
(LinkedList<PdxInstance>)pdx.getField("data");
for (PdxInstance data:dataList) {
Object colObject = ((PdxInstance)data).getField("col1");
System.out.println("col="+colObject);
}
System.out.println("Found a json object:" +
jsonString+":dataList="+dataList);
values.add(pdx);
} else {
System.out.println("key=" + struct.getKey() + ",data=" +
value);
values.add(value);
}
});
}
System.out.println("Search found " + values.size() + " results in " +
regionName);
return values;
}
}
Regards
Xiaojian
On 11/23/20, 9:46 AM, "Xiaojian Zhou" <[email protected]> wrote:
Ankit:
Anil can provide you some sample code of OQL query on JSON.
I will find some lucene sample code on JSON for you.
Regards
Xiaojian
On 11/23/20, 9:27 AM, "ankit Soni" <[email protected]> wrote:
Hi
I am looking for any means of querying (OQL/Lucene/API etc..?) this
stored
data. Looking for achieving this functionality first and second, in a
performant way.
I shared the OQL like syntax, to share my use-case easily and based on
some
reference found on doc. I am ok if a Lucene query or some other way can
fetch the results.
It will be of great help if you share the sample query/code fetching
this
data .
Thanks
Ankit.
On Mon, 23 Nov 2020 at 22:43, Xiaojian Zhou <[email protected]> wrote:
> Anil:
>
> The syntax is OQL. But I understand they want to query JSON object
base on
> the criteria.
>
> On 11/23/20, 9:08 AM, "Anilkumar Gingade" <[email protected]> wrote:
>
> Gester, Looking at the sample query, I Believe Ankit is asking
about
> OQL query not Lucene...
>
> -Anil.
>
>
> On 11/23/20, 9:02 AM, "Xiaojian Zhou" <[email protected]> wrote:
>
> Ankit:
>
> Geode provided lucene query on json field. Your query can be
> supported.
>
>
https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.html&data=04%7C01%7Czhouxh%40vmware.com%7Cd1fa267b22a14927924108d88fd7b941%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417503982299596%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=CWGBZA0ZgNJi7XST88NM8iJhR360sdLiG5LXCeECrO4%3D&reserved=0
>
> However in above document, it did not provided a query
example on
> JSON object.
>
> I can give you some sample code to query on JSON.
>
> Regards
> Xiaojian Zhou
>
> On 11/22/20, 11:53 AM, "ankit Soni"
<[email protected]>
> wrote:
>
> Hello geode-devs, please provide a guidance on this.
>
> Ankit.
>
> On Sat, 21 Nov 2020 at 10:23, ankit Soni <
> [email protected]> wrote:
>
> > Hello team,
> >
> > I am *evaluating usage of Geode (1.12) with storing JSON
> documents and
> > querying the same*. I am able to store the json records
> successfully in
> > geode but seeking guidance on how to query them.
> > More details on code and sample json is,
> >
> >
> > *Sample client-code*
> >
> > import org.apache.geode.cache.client.ClientCache;
> > import org.apache.geode.cache.client.ClientCacheFactory;
> > import
org.apache.geode.cache.client.ClientRegionShortcut;
> > import org.apache.geode.pdx.JSONFormatter;
> > import org.apache.geode.pdx.PdxInstance;
> >
> > public class MyTest {
> >
> > *//NOTE: Below is truncated json, single json
document
> can max contain an array of col1...col30 (30 diff attributes) within
data. *
> > public final static String jsonDoc_2 = "{" +
> > "\"data\":[{" +
> > "\"col1\": {" +
> > "\"k11\": \"aaa\"," +
> > "\"k12\":true," +
> > "\"k13\": 1111," +
> > "\"k14\":
> \"2020-12-31:00:00:00\"" +
> > "}," +
> > "\"col2\":[{" +
> > "\"k21\": \"222222\"," +
> > "\"k22\": true" +
> > "}]" +
> > "}]" +
> > "}";
> >
> > * //NOTE: Col1....col30 are mix of JSONObject ({})
and
> JSONArray ([]) as shown above in jsonDoc_2;*
> >
> > public static void main(String[] args){
> >
> > //create client-cache
> > ClientCache cache = new
> ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create();
> > Region<String, PdxInstance> region =
cache.<String,
>
PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY)
> > .create(REGION_NAME);
> >
> > //store json document
> > region.put("key",
JSONFormatter.fromJSON(jsonDoc_2));
> >
> > //How to query json document like,
> >
> > // 1. select col2.k21, col1, col20 from
/REGION_NAME
> where data.col2.k21 = '222222' OR data.col2.k21 = '333333'
> >
> > // 2. select col2.k21, col1.k11, col1 from
> /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy')
> > }
> > }
> >
> > *Server: Region-creation*
> >
> > gfsh> create region --name=REGION_NAME --type=PARTITION
> --redundant-copies=1 --total-num-buckets=61
> >
> >
> > *Setup: Distributed cluster of 3 nodes
> > *
> >
> > *My Observations/Problems*
> > - Put operation takes excessive time: region.put("key",
> > JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single
> record from () a
> > file and Storing in geode approx. takes . 3 secs
> > Is there any suggestions/configuration related to
> JSONFormatter API or
> > other to optimize this...?
> >
> > *Looking forward to guidance on querying this JOSN for
above
> sample
> > queries.*
> >
> > *Thanks*
> > *Ankit*
> >
>
>
>
>