pabrahamusa opened a new issue #6295:
URL: https://github.com/apache/incubator-pinot/issues/6295


   Hello,
   
   Currently I am having a config with following for real time ingestion I have 
enabled Text Index. When I try to query the data with TEXT_MATCH an error is 
throwing. Why it is so, Is this the right way to enable Text indexing?
   
   Error:
   `[
     {
       "errorCode": 200,
       "message": "QueryExecutionError:\njava.lang.NullPointerException\n\tat 
org.apache.pinot.core.operator.filter.TextMatchFilterOperator.getNextBlock(TextMatchFilterOperator.java:45)\n\tat
 
org.apache.pinot.core.operator.filter.TextMatchFilterOperator.getNextBlock(TextMatchFilterOperator.java:30)\n\tat
 
org.apache.pinot.core.operator.BaseOperator.nextBlock(BaseOperator.java:49)\n\tat
 
org.apache.pinot.core.operator.DocIdSetOperator.getNextBlock(DocIdSetOperator.java:62)\n\tat
 
org.apache.pinot.core.operator.DocIdSetOperator.getNextBlock(DocIdSetOperator.java:35)\n\tat
 
org.apache.pinot.core.operator.BaseOperator.nextBlock(BaseOperator.java:49)\n\tat
 
org.apache.pinot.core.operator.ProjectionOperator.getNextBlock(ProjectionOperator.java:57)\n\tat
 
org.apache.pinot.core.operator.ProjectionOperator.getNextBlock(ProjectionOperator.java:30)\n\tat
 
org.apache.pinot.core.operator.BaseOperator.nextBlock(BaseOperator.java:49)\n\tat
 org.apache.pinot.core.operator.transform.TransformOperator.getNext
 Block(TransformOperator.java:92)\n\tat 
org.apache.pinot.core.operator.transform.TransformOperator.getNextBlock(TransformOperator.java:39)\n\tat
 
org.apache.pinot.core.operator.BaseOperator.nextBlock(BaseOperator.java:49)\n\tat
 
org.apache.pinot.core.operator.query.SelectionOnlyOperator.getNextBlock(SelectionOnlyOperator.java:77)\n\tat
 
org.apache.pinot.core.operator.query.SelectionOnlyOperator.getNextBlock(SelectionOnlyOperator.java:38)"
     }
   ]`
   
   
   Config
      ```
    {
         "tableName": "log",
         "tableType": "REALTIME",
         "ingestionConfig": {
         },
         "segmentsConfig": {
           "timeColumnName": "five_mins_epoch",
           "timeType": "MINUTES",
           "retentionTimeUnit": "DAYS",
           "retentionTimeValue": "7",
           "schemaName": "log",
           "replication": "2",
           "replicasPerPartition": "2",
           "segmentPushType": "APPEND",
           "completionConfig": {
              "completionMode": "DOWNLOAD"
           }
         },
         "tenants": {
          },
         "tableIndexConfig": {
           "loadMode": "MMAP",
           "sortedColumn": ["timemillis"],
           "fieldConfigList": [
               {
                 "name": "log",
                 "encodingType": "RAW",
                 "indexType": "TEXT",
                 "properties": {
                   "enableQueryCacheForTextIndex": "true",
                   "deriveNumDocsPerChunkForRawIndex": "true"
                   }
               },
               {
                 "name": "container_name",
                 "encodingType": "RAW",
                 "indexType": "TEXT",
                 "properties": {
                   "enableQueryCacheForTextIndex": "true",
                   "deriveNumDocsPerChunkForRawIndex": "true"
                   }
               },
               {
                 "name": "pod_name",
                 "encodingType": "RAW",
                 "indexType": "TEXT",
                 "properties": {
                   "enableQueryCacheForTextIndex": "true",
                   "deriveNumDocsPerChunkForRawIndex": "true"
                   }
               },
               {
                 "name": "namespace_name",
                 "encodingType": "RAW",
                 "indexType": "TEXT",
                 "properties": {
                   "enableQueryCacheForTextIndex": "true",
                   "deriveNumDocsPerChunkForRawIndex": "true"
                   }
               },
               {
                 "name": "host",
                 "encodingType": "RAW",
                 "indexType": "TEXT",
                 "properties": {
                   "enableQueryCacheForTextIndex": "true",
                   "deriveNumDocsPerChunkForRawIndex": "true"
                  }
               },
               {
                 "name": "cluster",
                 "encodingType": "RAW",
                 "indexType": "TEXT",
                 "properties": {
                   "enableQueryCacheForTextIndex": "true",
                   "deriveNumDocsPerChunkForRawIndex": "true"
                 }
               }],
           "streamConfigs": {
             "streamType": "kafka",
             "stream.kafka.consumer.type": "simple",
             "stream.kafka.topic.name": "all_logs",
             "stream.kafka.decoder.class.name": 
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
             "stream.kafka.consumer.factory.class.name": 
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
             "stream.kafka.zk.broker.url": 
"cp-zookeeper-headless.logging.svc.cluster.local:2181",
             "stream.kafka.broker.list": 
"cp-kafka-headless.logging.svc.cluster.local:9092",
             "realtime.segment.flush.threshold.time": "12h",
             "realtime.segment.flush.threshold.size": "100000",
             "stream.kafka.consumer.prop.auto.offset.reset": "smallest"
           }
         },
         "metadata": {
           "customConfigs": {}
         }
       }
   
   
     schema.json: 
       {
         "schemaName": "log",
         "dimensionFieldSpecs": [
           {
             "name": "log",
             "dataType": "STRING",
             "maxLength": "10000",
             "defaultNullValue": ""
           },
           {
             "name": "cluster",
             "dataType": "STRING",
             "defaultNullValue": ""
           },
           {
             "name": "container_name",
             "dataType": "STRING",
             "defaultNullValue": ""
           },
           {
             "name": "namespace_name",
             "dataType": "STRING",
             "defaultNullValue": ""
           },
           {
             "name": "pod_name",
             "dataType": "STRING",
             "defaultNullValue": ""
           },
           {
             "name": "host",
             "dataType": "STRING",
             "defaultNullValue": ""
           },
           {
             "name": "timemillis",
             "dataType": "LONG",
             "defaultNullValue": "100000"
           }
         ],
         "metricFieldSpecs": [],
         "timeFieldSpec": {
           "incomingGranularitySpec": {
             "timeType": "MINUTES",
             "dataType": "LONG",
             "timeFormat": "EPOCH",
             "name": "five_mins_epoch"
           },
           "outgoingGranularitySpec": {
             "dataType": "LONG",
             "timeType": "MINUTES",
             "timeFormat": "EPOCH",
             "name": "five_mins_epoch"
           }
         }
       }
   
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to