ryanruaneyougov opened a new issue, #8635:
URL: https://github.com/apache/pinot/issues/8635

   I have found that I can ingest using JSON all types as multi-valued 
dimension columns with the exception of BOOLEAN, TIMESTAMP, and BYTES. I 
believe that JSON_ARRAY isn't a valid type, but I wasn't sure about 
BYTES_ARRAY. If anyone is about and can shed some light, I would be very 
appreciative.
   
   For BYTES_ARRAY I get:
   ```
   java.lang.UnsupportedOperationException: Unsupported data type : BYTES
   ```
   
   For BOOLEAN_ARRAY I get:
   ```
   java.lang.ClassCastException: class [Z cannot be cast to class 
java.lang.Integer ([Z and java.lang.Integer are in module java.base of loader 
'bootstrap')
   ```
   
   For TIMESTAMP_ARRAY I get:
   ```
   java.lang.ClassCastException: class java.sql.Timestamp cannot be cast to 
class java.lang.Long (java.sql.Timestamp is in module java.sql of loader 
'platform'; java.lang.Long is in module java.base of loader 'bootstrap')
   ```
   
   Here is my schema:
   ```json
   {
     "schemaName": "scoreSheet",
     "dimensionFieldSpecs": [
       {
         "name": "handle",
         "dataType": "STRING"
       },
       {
         "name": "names",
         "dataType": "STRING",
         "singleValueField": false
       },
       {
         "name": "age",
         "dataType": "INT"
       },    
       {
         "name": "gameIds",
         "dataType": "INT",
         "singleValueField": false
       },
       {
         "name": "hasPlayed",
         "dataType": "BOOLEAN"
       },
       {
         "name": "gamesWon",
         "dataType": "BOOLEAN",
         "singleValueField": false
       },
       {
         "name": "dateOfBirth",
         "dataType": "TIMESTAMP"
       },
       {
         "name": "datesPlayed",
         "dataType": "TIMESTAMP",
         "singleValueField": false
       },
       {
         "name": "scores",
         "dataType": "LONG",
         "singleValueField": false
       },
       {
         "name": "handicapAdjustedScores",
         "dataType": "FLOAT",
         "singleValueField": false
       },
       {
         "name": "handicapAdjustedScores_highPrecision",
         "dataType": "FLOAT",
         "singleValueField": false
       },
       {
         "name": "extra",
         "dataType": "JSON"
       },
       {
         "name": "raw",
         "dataType": "BYTES"
       },
       {
         "name": "rawArray",
         "dataType": "BYTES",
         "singleValueField": false
       }
     ],
     "metricFieldSpecs": [
       {
         "name": "totalScore",
         "dataType": "LONG"
       },
       {
         "name": "avgScore",
         "dataType": "FLOAT"
       },
       {
         "name": "avgScore_highPrecision",
         "dataType": "DOUBLE"
       }
     ],
     "dateTimeFieldSpecs": [
       {
         "name": "dateOfFirstGame",
         "dataType": "LONG",
         "format": "1:MILLISECONDS:EPOCH",
         "granularity": "1:MILLISECONDS"
       }
     ]
   }
   
   ```
   Here is my table:
   ```json
   {
       "tableName": "scoreSheet",
       "tableType": "OFFLINE",
       "segmentsConfig": {
         "replication": 1
       },
       "tenants": {
         "broker":"DefaultTenant",
         "server":"DefaultTenant"
       },
       "tableIndexConfig": {
         "loadMode": "MMAP"
       },
       "ingestionConfig": {
         "batchIngestionConfig": {
           "segmentIngestionType": "APPEND",
           "segmentIngestionFrequency": "DAILY"
         }
       },
       "metadata": {}
   }
   
   ```
   
   Here is my ingestion job:
   ```yaml
   executionFrameworkSpec:
     name: 'standalone'
     segmentGenerationJobRunnerClassName: 
'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
     segmentTarPushJobRunnerClassName: 
'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
   jobType: SegmentCreationAndTarPush
   inputDirURI: '/db/score_sheet'
   # includeFileNamePattern: 'glob:**/data.csv'
   includeFileNamePattern: 'glob:**/data.json'
   outputDirURI: '/opt/pinot/data/score_sheet'
   overwriteOutput: true
   pinotFSSpecs:
     - scheme: file
       className: org.apache.pinot.spi.filesystem.LocalPinotFS
   recordReaderSpec:
     # dataFormat: 'csv'
     dataFormat: 'json'
     # className: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReader'
     className: 'org.apache.pinot.plugin.inputformat.json.JSONRecordReader'
     # configClassName: 
'org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig'
   tableSpec:
     tableName: 'scoreSheet'
   pinotClusterSpecs:
     - controllerURI: 'http://localhost:9000'
   
   ```
   
   Here is my data:
   ```
   [
     {
       "names": ["James", "Smith"],
       "gameIds": [1, 2, 3],
       "datesPlayed": ["2020-01-01 10:45:28", "2020-02-01 10:45:28", 
"2020-03-01 10:45:28"],
       "gamesWon": [true, false, true],
       "scores": [3, 6, 2],
       "handicapAdjustedScores": [2.1, 4.9, 3.2],
       "handicapAdjustedScores_highPrecision": [2.15, 4.99, 3.21],
       "rawArray": ["cd", "ef"],
       "handle": "Gladiator",
       "age": 10,
       "totalScore": 11,
       "avgScore": 3.6,
       "avgScore_highPrecision": 3.66,
       "hasPlayed": true,
       "dateOfBirth": "2011-01-01 00:00:00",
       "dateOfFirstGame": 1577875528000,
       "extra": "{\"a\": \"b\"}",
       "raw": "ab"
     },
     {
       "names": ["Giles", "Richie"],
       "gameIds": [],
       "datesPlayed":[] ,
       "gamesWon": [],
       "scores": [],
       "handicapAdjustedScores": [],
       "handicapAdjustedScores_highPrecision": [],
       "rawArray": [],
       "handle": "Thrumbar",
       "age": 30,
       "totalScore": 0,
       "avgScore": 0,
       "avgScore_highPrecision": 0,
       "hasPlayed": false,
       "dateOfBirth": 662688000000,
       "dateOfFirstGame": 1420070400001,
       "extra": {},
       "raw": ""
     }
   ]
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to