DixitThinkbiz opened a new issue, #15295:
URL: https://github.com/apache/pinot/issues/15295

   ### Description
   I am working on a proof-of-concept (POC) where I need to ingest data into 
Pinot and use S3 as deep storage. Although data ingestion to Pinot via S3 is 
successful, the segments are not being uploaded to S3 (deep storage).
   
   
   
   ### Table Schema
   ```json
   {
     "schemaName": "employee_attendance",
     "dimensionFieldSpecs": [
       { "name": "attendance_id", "dataType": "INT" },
       { "name": "employee_id", "dataType": "INT" }
     ],
     "dateTimeFieldSpecs": [
       {
         "name": "punch_time",
         "dataType": "TIMESTAMP",
         "format": "1:MILLISECONDS:EPOCH",
         "granularity": "1:MILLISECONDS"
       }
     ],
     "primaryKeyColumns": ["attendance_id"]
   }
   ```
   
   ### Table Configuration
   ```json
   {
     "tableName": "employee_attendance",
     "tableType": "OFFLINE",
     "segmentsConfig": {
       "timeColumnName": "punch_time",
       "schemaName": "employee_attendance",
       "replication": "1"
     },
     "tableIndexConfig": {
       "starTreeIndexConfigs": [
         {
           "dimensionsSplitOrder": ["employee_id"],
           "skipStarNodeCreationForDimensions": [],
           "maxLeafRecords": "24",
           "aggregationConfigs": [
             { "columnName": "employee_id", "aggregationFunction": "COUNT" }
           ]
         }
       ]
     },
     "routing": { "instanceSelectorType": "strictReplicaGroup" },
     "tenants": {
       "broker": "DefaultTenant",
       "server": "DefaultTenant"
     },
     "ingestionConfig": {
       "batchIngestionConfig": {
         "segmentIngestionType": "APPEND",
         "segmentIngestionFrequency": "DAILY",
         "batchConfigMaps": [
           {
             "inputDirURI": "s3://bucket-name/",
             "includeFileNamePattern": "glob:**/*.json",
             "excludeFileNamePattern": "glob:**/*.tmp",
             "inputFormat": "json",
             "input.fs.className": 
"org.apache.pinot.plugin.filesystem.S3PinotFS",
             "input.fs.prop.region": "ap-northeast-1",
             "input.fs.prop.accessKey": "****",
             "input.fs.prop.secretKey": "****"
           }
         ],
         "segmentNameSpec": {},
         "pushSpec": {}
       }
     },
     "task": {
       "taskTypeConfigsMap": {
         "SegmentGenerationAndPushTask": {
           "schedule": "0 */1 * * * ?",
           "tableMaxNumTasks": "10"
         }
       }
     },
     "metadata": {}
   }
   ```
   
   ### Controller Configuration (controller.conf)
   ```properties
   # Pinot Role
   pinot.service.role=CONTROLLER
   
   # Pinot Cluster name
   pinot.cluster.name=pinot-quickstart
   
   # Pinot Zookeeper Server
   pinot.zk.server=localhost:2181
   
   # Use hostname as Pinot Instance ID
   pinot.set.instance.id.to.hostname=true
   
   # Pinot Controller Port
   controller.port=9000
   controller.zk.str=pinot-zookeeper:2181
   controller.vip.host=127.0.0.1
   controller.vip.port=9000
   
   controller.task.scheduler.enabled=true
   controller.local.temp.dir=/var/pinot/controller/data
   
   # Deep storage configuration
   
pinot.controller.storage.factory.class.s3=org.apache.pinot.plugin.filesystem.S3PinotFS
   pinot.controller.storage.factory.s3.disableAcl=false
   pinot.controller.storage.factory.s3.region=ap-northeast-1
   controller.data.dir=s3://bucket-name/
   
   pinot.controller.segment.fetcher.protocols=file,http,s3
   
pinot.controller.segment.fetcher.s3.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcher
   
   pinot.controller.storage.factory.s3.accessKey=****
   pinot.controller.storage.factory.s3.secretKey=****
   ```
   
   ### Server Configuration (server.conf)
   ```properties
   # Pinot Role
   pinot.service.role=SERVER
   
   # Pinot Cluster name
   pinot.cluster.name=pinot-quickstart
   
   # Pinot Zookeeper Server
   pinot.zk.server=localhost:2181
   pinot.set.instance.id.to.hostname=true
   
   # Pinot Server Ports
   pinot.server.netty.port=8098
   pinot.server.adminapi.port=8097
   
   # Data directories and deep storage
   pinot.server.instance.dataDir=/tmp/pinot/data/server/index
   pinot.server.instance.segmentTarDir=/tmp/pinot/data/server/segmentTar
   pinot.server.segment.store.uri=s3://bucket-name/
   pinot.server.storage.factory.s3.disableAcl=false
   
pinot.server.storage.factory.class.s3=org.apache.pinot.plugin.filesystem.S3PinotFS
   pinot.server.storage.factory.s3.region=ap-northeast-1
   pinot.server.segment.fetcher.protocols=file,http,s3
   
pinot.server.segment.fetcher.s3.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcher
   pinot.server.storage.factory.s3.accessKey=****
   pinot.server.storage.factory.s3.secretKey=****
   ```
   
   ### Minion Configuration (minion.conf)
   ```properties
   pinot.set.instance.id.to.hostname=true
   
pinot.minion.storage.factory.class.s3=org.apache.pinot.plugin.filesystem.S3PinotFS
   pinot.minion.storage.factory.s3.region=us-east-1
   pinot.minion.segment.fetcher.protocols=file,http,s3
   
pinot.minion.segment.fetcher.s3.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcher
   ```
   
   ### Request for Assistance
   Any insights or suggestions on why the segments might not be uploading to S3 
(deep storage) would be greatly appreciated.  
     
   Please let me know if further logs or configuration details are needed.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to