Thanks a lot Xiaojian Zhou for your clear explanation and detailed reply. This has helped a lot to proceed with my experiments.
Ankit. On Fri, Nov 27, 2020, 5:48 AM Xiaojian Zhou <zho...@vmware.com> wrote: > Ankit: > > I wrote some lucene sample code using your data and query. > > I also provided gfsh commands to create nested query. > > Note: I purposely provided 2 data to show the difference of query. > > package examples; > > import org.apache.geode.cache.Region; > import org.apache.geode.cache.client.ClientCache; > import org.apache.geode.cache.client.ClientCacheFactory; > import org.apache.geode.cache.client.ClientRegionShortcut; > import org.apache.geode.cache.lucene.LuceneQuery; > import org.apache.geode.cache.lucene.LuceneQueryException; > import org.apache.geode.cache.lucene.LuceneServiceProvider; > import org.apache.geode.cache.lucene.PageableLuceneQueryResults; > import org.apache.geode.cache.lucene.internal.LuceneIndexImpl; > import org.apache.geode.cache.lucene.internal.LuceneServiceImpl; > import org.apache.geode.pdx.JSONFormatter; > import org.apache.geode.pdx.PdxInstance; > > import java.io.IOException; > import java.util.HashSet; > import java.util.LinkedList; > import java.util.List; > import java.util.concurrent.TimeUnit; > import java.util.concurrent.atomic.AtomicInteger; > > public class JSONTest { > //NOTE: Below is truncated json, single json document can max contain an > array of col1...col30 (30 diff attributes) > // within data. > public final static String jsonDoc_2 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"aaa\"," + > "\"k12\":true," + > "\"k13\": 1111," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"222222\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > public final static String jsonDoc_3 = "{" + > "\"data\":[{" + > "\"col1\": {" + > "\"k11\": \"bbb\"," + > "\"k12\":true," + > "\"k13\": 1111," + > "\"k14\": \"2020-12-31:00:00:00\"" + > "}," + > "\"col2\":[{" + > "\"k21\": \"333333\"," + > "\"k22\": true" + > "}]" + > "}]" + > "}"; > > //NOTE: Col1....col30 are mix of JSONObject ({}) and JSONArray ([]) as > shown above in jsonDoc_2; > > public final static String REGION_NAME = "REGION_NAME"; > > public static void main(String[] args) throws InterruptedException, > LuceneQueryException { > > //create client-cache > ClientCache cache = new > ClientCacheFactory().addPoolLocator("localhost", > 10334).setPdxReadSerialized(true).create(); > Region<String, PdxInstance> region = cache.<String, > > PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > .create(REGION_NAME); > > //store json document > region.put("key", JSONFormatter.fromJSON(jsonDoc_2)); > region.put("key3", JSONFormatter.fromJSON(jsonDoc_3)); > > LuceneServiceImpl service = (LuceneServiceImpl) > LuceneServiceProvider.get(cache); > LuceneIndexImpl index = (LuceneIndexImpl) > service.getIndex("jsonIndex", "REGION_NAME"); > if (index != null) { > service.waitUntilFlushed("jsonIndex", "REGION_NAME", 60000, > TimeUnit.MILLISECONDS); > } > > LuceneQuery query = > service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME", > "222222 OR 333333", "data.col2.k21"); > System.out.println("Query 222222 OR 333333"); > HashSet results = getResults(query, "REGION_NAME"); > > LuceneQuery query2 = > service.createLuceneQueryFactory().create("jsonIndex", "REGION_NAME", > "aaa OR xxx OR yyy", "data.col1.k11"); > System.out.println("Query aaa OR xxx OR yyy"); > results = getResults(query2, "REGION_NAME"); > > // server side: > // gfsh> start locator > // gfsh> start server --name=server50505 --server-port=50505 > // gfsh> create lucene index --name=jsonIndex --region=/REGION_NAME > --field=data.col2.k21,data.col1.k11 > // --serializer=org.apache.geode.cache.lucene.FlatFormatSerializer > // gfsh> create region --name=REGION_NAME --type=PARTITION > --redundant-copies=1 --total-num-buckets=61 > > // How to query json document like, > > // 1. select col2.k21, col1, col20 from /REGION_NAME where > //data.col2.k21 = '222222' OR data.col2.k21 = '333333' > > // 2. select col2.k21, col1.k11, col1 from /REGION_NAME where > // data.col1.k11 in ('aaa', 'xxx', 'yyy') > } > > private static HashSet getResults(LuceneQuery query, String regionName) > throws LuceneQueryException { > if (query == null) { > return null; > } > > PageableLuceneQueryResults<Object, Object> results = query.findPages(); > if (results.size() > 0) { > System.out.println("Search found " + results.size() + " results in " > + regionName + ", page size is " + query.getPageSize()); > } > > HashSet values = new HashSet<>(); > while (results.hasNext()) { > results.next().stream() > .forEach(struct -> { > Object value = struct.getValue(); > if (value instanceof PdxInstance) { > PdxInstance pdx = (PdxInstance) value; > String jsonString = JSONFormatter.toJSON(pdx); > List<PdxInstance> dataList = > (LinkedList<PdxInstance>)pdx.getField("data"); > for (PdxInstance data:dataList) { > Object colObject = > ((PdxInstance)data).getField("col1"); > System.out.println("col="+colObject); > } > System.out.println("Found a json object:" + > jsonString+":dataList="+dataList); > values.add(pdx); > } else { > System.out.println("key=" + struct.getKey() + ",data=" + > value); > values.add(value); > } > }); > } > System.out.println("Search found " + values.size() + " results in " + > regionName); > return values; > } > } > > Regards > Xiaojian > > On 11/23/20, 9:46 AM, "Xiaojian Zhou" <zho...@vmware.com> wrote: > > Ankit: > > Anil can provide you some sample code of OQL query on JSON. > > I will find some lucene sample code on JSON for you. > > Regards > Xiaojian > > On 11/23/20, 9:27 AM, "ankit Soni" <ankit.soni.ge...@gmail.com> wrote: > > Hi > I am looking for any means of querying (OQL/Lucene/API etc..?) > this stored > data. Looking for achieving this functionality first and second, > in a > performant way. > > I shared the OQL like syntax, to share my use-case easily and > based on some > reference found on doc. I am ok if a Lucene query or some other > way can > fetch the results. > > It will be of great help if you share the sample query/code > fetching this > data . > > Thanks > Ankit. > > > On Mon, 23 Nov 2020 at 22:43, Xiaojian Zhou <zho...@vmware.com> > wrote: > > > Anil: > > > > The syntax is OQL. But I understand they want to query JSON > object base on > > the criteria. > > > > On 11/23/20, 9:08 AM, "Anilkumar Gingade" <aging...@vmware.com> > wrote: > > > > Gester, Looking at the sample query, I Believe Ankit is > asking about > > OQL query not Lucene... > > > > -Anil. > > > > > > On 11/23/20, 9:02 AM, "Xiaojian Zhou" <zho...@vmware.com> > wrote: > > > > Ankit: > > > > Geode provided lucene query on json field. Your query > can be > > supported. > > > > > https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgemfire.docs.pivotal.io%2F910%2Fgeode%2Ftools_modules%2Flucene_integration.html&data=04%7C01%7Czhouxh%40vmware.com%7Cd1fa267b22a14927924108d88fd7b941%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C637417503982299596%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=CWGBZA0ZgNJi7XST88NM8iJhR360sdLiG5LXCeECrO4%3D&reserved=0 > > > > However in above document, it did not provided a query > example on > > JSON object. > > > > I can give you some sample code to query on JSON. > > > > Regards > > Xiaojian Zhou > > > > On 11/22/20, 11:53 AM, "ankit Soni" < > ankit.soni.ge...@gmail.com> > > wrote: > > > > Hello geode-devs, please provide a guidance on this. > > > > Ankit. > > > > On Sat, 21 Nov 2020 at 10:23, ankit Soni < > > ankit.soni.ge...@gmail.com> wrote: > > > > > Hello team, > > > > > > I am *evaluating usage of Geode (1.12) with > storing JSON > > documents and > > > querying the same*. I am able to store the json > records > > successfully in > > > geode but seeking guidance on how to query them. > > > More details on code and sample json is, > > > > > > > > > *Sample client-code* > > > > > > import org.apache.geode.cache.client.ClientCache; > > > import > org.apache.geode.cache.client.ClientCacheFactory; > > > import > org.apache.geode.cache.client.ClientRegionShortcut; > > > import org.apache.geode.pdx.JSONFormatter; > > > import org.apache.geode.pdx.PdxInstance; > > > > > > public class MyTest { > > > > > > *//NOTE: Below is truncated json, single json > document > > can max contain an array of col1...col30 (30 diff attributes) > within data. * > > > public final static String jsonDoc_2 = "{" + > > > "\"data\":[{" + > > > "\"col1\": {" + > > > "\"k11\": > \"aaa\"," + > > > "\"k12\":true," + > > > "\"k13\": 1111," + > > > "\"k14\": > > \"2020-12-31:00:00:00\"" + > > > "}," + > > > "\"col2\":[{" + > > > "\"k21\": > \"222222\"," + > > > "\"k22\": true" + > > > "}]" + > > > "}]" + > > > "}"; > > > > > > * //NOTE: Col1....col30 are mix of JSONObject > ({}) and > > JSONArray ([]) as shown above in jsonDoc_2;* > > > > > > public static void main(String[] args){ > > > > > > //create client-cache > > > ClientCache cache = new > > ClientCacheFactory().addPoolLocator(LOCATOR_HOST, PORT).create(); > > > Region<String, PdxInstance> region = > cache.<String, > > > PdxInstance>createClientRegionFactory(ClientRegionShortcut.CACHING_PROXY) > > > .create(REGION_NAME); > > > > > > //store json document > > > region.put("key", > JSONFormatter.fromJSON(jsonDoc_2)); > > > > > > //How to query json document like, > > > > > > // 1. select col2.k21, col1, col20 from > /REGION_NAME > > where data.col2.k21 = '222222' OR data.col2.k21 = '333333' > > > > > > // 2. select col2.k21, col1.k11, col1 from > > /REGION_NAME where data.col1.k11 in ('aaa', 'xxx', 'yyy') > > > } > > > } > > > > > > *Server: Region-creation* > > > > > > gfsh> create region --name=REGION_NAME > --type=PARTITION > > --redundant-copies=1 --total-num-buckets=61 > > > > > > > > > *Setup: Distributed cluster of 3 nodes > > > * > > > > > > *My Observations/Problems* > > > - Put operation takes excessive time: > region.put("key", > > > JSONFormatter.fromJSON(jsonDoc_2)); - Fetching a > single > > record from () a > > > file and Storing in geode approx. takes . 3 secs > > > Is there any suggestions/configuration related > to > > JSONFormatter API or > > > other to optimize this...? > > > > > > *Looking forward to guidance on querying this JOSN > for above > > sample > > > queries.* > > > > > > *Thanks* > > > *Ankit* > > > > > > > > > > > > > >