Ankit: I wrote some lucene sample code using your data and query.
I also provided gfsh commands to create nested query. Note: I purposely provided 2 data to show the difference of query. package examples; import org.apache.geode.cache.Region; import org.apache.geode.cache.client.ClientCache; import org.apache.geode.cache.client.ClientCacheFactory; import org.apache.geode.cache.client.ClientRegionShortcut; import org.apache.geode.cache.lucene.LuceneQuery; import org.apache.geode.cache.lucene.LuceneQueryException; import org.apache.geode.cache.lucene.LuceneServiceProvider; import org.apache.geode.cache.lucene.PageableLuceneQueryResults; import org.apache.geode.cache.lucene.internal.LuceneIndexImpl; import org.apache.geode.cache.lucene.internal.LuceneServiceImpl; import org.apache.geode.pdx.JSONFormatter; import org.apache.geode.pdx.PdxInstance; import java.io.IOException; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; public class JSONTest { //NOTE: Below is truncated json, single json document can max contain an array of col1...col30 (30 diff attributes) // within data. public final static String jsonDoc_2 = "{" + "\"data\":[{" + "\"col1\": {" + "\"k11\": \"aaa\"," + "\"k12\":true," + "\"k13\": 1111," + "\"k14\": \"2020-12-31:00:00:00\"" + "}," + "\"col2\":[{" + "\"k21\": \"222222\"," + "\"k22\": true" + "}]" + "}]" + "}"; public final static String jsonDoc_3 = "{" + "\"data\":[{" + "\"col1\": {" + "\"k11\": \"bbb\"," + "\"k12\":true," + "\"k13\": 1111," + "\"k14\": \"2020-12-31:00:00:00\"" + "}," + "\"col2\":[{" + "\"k21\": \"333333\"," + "\"k22\": true" + "}]" + "}]" + "}"; //NOTE: Col1....col30 are mix of JSONObject ({}) and JSONArray ([]) as shown above in jsonDoc_2; public final static String REGION_NAME = "REGION_NAME"; public static void main(String[] args) throws InterruptedException, LuceneQueryException { //create client-cache ClientCache cache = new ClientCacheFactory().addPoolLocator("localhost", 10334).setPdxReadSerialized(true).create(); Region<String, PdxInstance> region = cache.<String, PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) .create(REGION_NAME); //store json document region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); region.put("key3", JSONFormatter.fromJSON(jsonDoc_3)); LuceneServiceImpl service = (LuceneServiceImpl) LuceneServiceProvider.get(cache); LuceneIndexImpl index = (LuceneIndexImpl) service.getIndex("jsonIndex", "REGION_NAME"); if (index != null) { service.waitUntilFlushed("jsonIndex", "REGION_NAME", 60000, TimeUnit.MILLISECONDS); } LuceneQuery query = service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME", "222222 OR 333333", "data.col2.k21"); System.out.println("Query 222222 OR 333333"); HashSet results = getResults(query, "REGION_NAME"); LuceneQuery query2 = service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME", "aaa OR xxx OR yyy", "data.col1.k11"); System.out.println("Query aaa OR xxx OR yyy"); results = getResults(query2, "REGION_NAME"); // server side: // gfsh> start locator // gfsh> start server --name=server50505 --server-port=50505 // gfsh> create lucene index --name=jsonIndex --region=/REGION_NAME --field=data.col2.k21,data.col1.k11 // --serializer=org.apache.geode.cache.lucene.FlatFormatSerializer // gfsh> create region --name=REGION_NAME --type=PARTITION --redundant-copies=1 --total-num-buckets=61 // How to query json document like, // 1. select col2.k21, col1, col20 from /REGION_NAME where //data.col2.k21 = '222222' OR data.col2.k21 = '333333' // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where // data.col1.k11 in ('aaa', 'xxx', 'yyy') } private static HashSet getResults(LuceneQuery query, String regionName) throws LuceneQueryException { if (query == null) { return null; } PageableLuceneQueryResults<Object, Object> results = query.findPages(); if (results.size() > 0) { System.out.println("Search found " + results.size() + " results in " + regionName + ", page size is " + query.getPageSize()); } HashSet values = new HashSet<>(); while (results.hasNext()) { results.next().stream() .forEach(struct -> { Object value = struct.getValue(); if (value instanceof PdxInstance) { PdxInstance pdx = (PdxInstance) value; String jsonString = JSONFormatter.toJSON(pdx); List<PdxInstance> dataList = (LinkedList<PdxInstance>)pdx.getField("data"); for (PdxInstance data:dataList) { Object colObject = ((PdxInstance)data).getField("col1"); System.out.println("col="+colObject); } System.out.println("Found a json object:" + jsonString+":dataList="+dataList); values.add(pdx); } else { System.out.println("key=" + struct.getKey() + ",data=" + value); values.add(value); } }); } System.out.println("Search found " + values.size() + " results in " + regionName); return values; } } Regards Xiaojian On 11/23/20, 9:46 AM, "Xiaojian Zhou" <zho...@vmware.com> wrote: Ankit: Anil can provide you some sample code of OQL query on JSON. I will find some lucene sample code on JSON for you. Regards Xiaojian On 11/23/20, 9:27 AM, "ankit Soni" <ankit.soni.ge...@gmail.com> wrote: Hi I am looking for any means of querying (OQL/Lucene/API etc..?) this stored data. Looking for achieving this functionality first and second, in a performant way. I shared the OQL like syntax, to share my use-case easily and based on some reference found on doc. I am ok if a Lucene query or some other way can fetch the results. It will be of great help if you share the sample query/code fetching this data . Thanks Ankit. On Mon, 23 Nov 2020 at 22:43, Xiaojian Zhou <zho...@vmware.com> wrote: > Anil: > > The syntax is OQL. But I understand they want to query JSON object base on > the criteria. > > On 11/23/20, 9:08 AM, "Anilkumar Gingade" <aging...@vmware.com> wrote: > > Gester, Looking at the sample query, I Believe Ankit is asking about > OQL query not Lucene... > > -Anil. > > > On 11/23/20, 9:02 AM, "Xiaojian Zhou" <zho...@vmware.com> wrote: > > Ankit: > > Geode provided lucene query on json field. Your query can be > supported. > > https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.html&data=04%7C01%7Czhouxh%40vmware.com%7Cd1fa267b22a14927924108d88fd7b941%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417503982299596%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=CWGBZA0ZgNJi7XST88NM8iJhR360sdLiG5LXCeECrO4%3D&reserved=0 > > However in above document, it did not provided a query example on > JSON object. > > I can give you some sample code to query on JSON. > > Regards > Xiaojian Zhou > > On 11/22/20, 11:53 AM, "ankit Soni" <ankit.soni.ge...@gmail.com> > wrote: > > Hello geode-devs, please provide a guidance on this. > > Ankit. > > On Sat, 21 Nov 2020 at 10:23, ankit Soni < > ankit.soni.ge...@gmail.com> wrote: > > > Hello team, > > > > I am *evaluating usage of Geode (1.12) with storing JSON > documents and > > querying the same*. I am able to store the json records > successfully in > > geode but seeking guidance on how to query them. > > More details on code and sample json is, > > > > > > *Sample client-code* > > > > import org.apache.geode.cache.client.ClientCache; > > import org.apache.geode.cache.client.ClientCacheFactory; > > import org.apache.geode.cache.client.ClientRegionShortcut; > > import org.apache.geode.pdx.JSONFormatter; > > import org.apache.geode.pdx.PdxInstance; > > > > public class MyTest { > > > > *//NOTE: Below is truncated json, single json document > can max contain an array of col1...col30 (30 diff attributes) within data. * > > public final static String jsonDoc_2 = "{" + > > "\"data\":[{" + > > "\"col1\": {" + > > "\"k11\": \"aaa\"," + > > "\"k12\":true," + > > "\"k13\": 1111," + > > "\"k14\": > \"2020-12-31:00:00:00\"" + > > "}," + > > "\"col2\":[{" + > > "\"k21\": \"222222\"," + > > "\"k22\": true" + > > "}]" + > > "}]" + > > "}"; > > > > * //NOTE: Col1....col30 are mix of JSONObject ({}) and > JSONArray ([]) as shown above in jsonDoc_2;* > > > > public static void main(String[] args){ > > > > //create client-cache > > ClientCache cache = new > ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > > Region<String, PdxInstance> region = cache.<String, > PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > > .create(REGION_NAME); > > > > //store json document > > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > > > > //How to query json document like, > > > > // 1. select col2.k21, col1, col20 from /REGION_NAME > where data.col2.k21 = '222222' OR data.col2.k21 = '333333' > > > > // 2. select col2.k21, col1.k11, col1 from > /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') > > } > > } > > > > *Server: Region-creation* > > > > gfsh> create region --name=REGION_NAME --type=PARTITION > --redundant-copies=1 --total-num-buckets=61 > > > > > > *Setup: Distributed cluster of 3 nodes > > * > > > > *My Observations/Problems* > > - Put operation takes excessive time: region.put("key", > > JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a single > record from () a > > file and Storing in geode approx. takes . 3 secs > > Is there any suggestions/configuration related to > JSONFormatter API or > > other to optimize this...? > > > > *Looking forward to guidance on querying this JOSN for above > sample > > queries.* > > > > *Thanks* > > *Ankit* > > > > > >