hugosjoberg opened a new issue, #11940: URL: https://github.com/apache/pinot/issues/11940
Hi, I'm trying to import a CSV, I'm following the guide here: https://docs.pinot.apache.org/basics/getting-started/pushing-your-data-to-pinot I run Pinot using docker-compose on M2-Apple chip `docker-compose.yaml` ```yaml version: '3.7' services: zookeeper: image: zookeeper hostname: zookeeper container_name: manual-zookeeper ports: - "2181:2181" environment: ZOOKEEPER_CLIENT_PORT: 2181 ZOOKEEPER_TICK_TIME: 2000 pinot-controller: image: apachepinot/pinot:1.0.0 command: "StartController -zkAddress manual-zookeeper:2181" container_name: "manual-pinot-controller" volumes: - ./config:/config - ./data:/data restart: unless-stopped ports: - "9000:9000" depends_on: - zookeeper pinot-broker: image: apachepinot/pinot:1.0.0 command: "StartBroker -zkAddress manual-zookeeper:2181" restart: unless-stopped container_name: "manual-pinot-broker" volumes: - ./config:/config - ./data:/data ports: - "8099:8099" depends_on: - pinot-controller pinot-server: image: apachepinot/pinot:1.0.0 command: "StartServer -zkAddress manual-zookeeper:2181" restart: unless-stopped container_name: "manual-pinot-server" volumes: - ./config:/config - ./data:/data depends_on: - pinot-broker ``` `config/schema.json` ```json { "schemaName": "transcript", "dimensionFieldSpecs": [ { "name": "studentID", "dataType": "INT" }, { "name": "firstName", "dataType": "STRING" }, { "name": "lastName", "dataType": "STRING" }, { "name": "gender", "dataType": "STRING" }, { "name": "subject", "dataType": "STRING" } ], "metricFieldSpecs": [ { "name": "score", "dataType": "FLOAT" } ], "dateTimeFieldSpecs": [ { "name": "timestampInEpoch", "dataType": "LONG", "format": "1:MILLISECONDS:EPOCH", "granularity": "1:MILLISECONDS" } ] } ``` `table.json` ```json { "tableName": "transcript", "segmentsConfig": { "timeColumnName": "timestampInEpoch", "timeType": "MILLISECONDS", "replication": "1", "schemaName": "transcript" }, "tableIndexConfig": { "invertedIndexColumns": [], "loadMode": "MMAP" }, "tenants": { "broker": "DefaultTenant", "server": "DefaultTenant" }, "tableType": "OFFLINE", "metadata": {} } ``` `job-spec.yml` ```yml executionFrameworkSpec: name: 'standalone' segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner' segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner' segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner' jobType: SegmentCreationAndTarPush inputDirURI: '/data/' includeFileNamePattern: 'glob:**/*.csv' outputDirURI: '/tmp/pinot-quick-start/segments/' overwriteOutput: true pinotFSSpecs: - scheme: file className: org.apache.pinot.spi.filesystem.LocalPinotFS recordReaderSpec: dataFormat: 'csv' className: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReader' configClassName: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig' tableSpec: tableName: 'transcript' schemaURI: 'http://manual-pinot-controller:9000/tables/transcript/schema' tableConfigURI: 'http://manual-pinot-controller:9000/tables/transcript' pinotClusterSpecs: - controllerURI: 'http://manual-pinot-controller:9000' ``` `data/transcript.csv` ```csv studentID,firstName,lastName,gender,subject,score,timestampInEpoch 200,Lucy,Smith,Female,Maths,3.8,1570863600000 200,Lucy,Smith,Female,English,3.5,1571036400000 201,Bob,King,Male,Maths,3.2,1571900400000 202,Nick,Young,Male,Physics,3.6,1572418800000 ``` I first apply the schema: ```bash docker exec \ -it manual-pinot-controller bin/pinot-admin.sh AddTable \ -tableConfigFile /config/table.json \ -schemaFile /config/schema.json \ -exec ``` Then I execute the job-spec ```bash docker exec \ -it manual-pinot-controller bin/pinot-admin.sh LaunchDataIngestionJob \ -jobSpecFile /config/job-spec.yml ``` Then I get this error message and java stack-trace which we all love 😢: ``` 2023/11/02 23:02:45.260 ERROR [LaunchDataIngestionJobCommand] [main] Got exception to kick off standalone data ingestion job - java.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:152) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:121) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:130) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.tools.Command.call(Command.java:33) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.tools.Command.call(Command.java:29) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at picocli.CommandLine.executeUserObject(CommandLine.java:1953) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at picocli.CommandLine.access$1300(CommandLine.java:145) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at picocli.CommandLine$RunLast.handle(CommandLine.java:2346) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at picocli.CommandLine$RunLast.handle(CommandLine.java:2311) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at picocli.CommandLine.execute(CommandLine.java:2078) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.tools.admin.PinotAdministrator.execute(PinotAdministrator.java:171) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.tools.admin.PinotAdministrator.main(PinotAdministrator.java:202) [pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] Caused by: java.nio.file.NoSuchFileException: /tmp/pinot-quick-start/rawdata at sun.nio.fs.UnixException.translateToIOException(UnixException.java:92) ~[?:?] at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111) ~[?:?] at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:116) ~[?:?] at sun.nio.fs.UnixFileAttributeViews$Basic.readAttributes(UnixFileAttributeViews.java:55) ~[?:?] at sun.nio.fs.UnixFileSystemProvider.readAttributes(UnixFileSystemProvider.java:149) ~[?:?] at sun.nio.fs.LinuxFileSystemProvider.readAttributes(LinuxFileSystemProvider.java:99) ~[?:?] at java.nio.file.Files.readAttributes(Files.java:1764) ~[?:?] at java.nio.file.FileTreeWalker.getAttributes(FileTreeWalker.java:219) ~[?:?] at java.nio.file.FileTreeWalker.visit(FileTreeWalker.java:276) ~[?:?] at java.nio.file.FileTreeWalker.walk(FileTreeWalker.java:322) ~[?:?] at java.nio.file.FileTreeIterator.<init>(FileTreeIterator.java:71) ~[?:?] at java.nio.file.Files.walk(Files.java:3825) ~[?:?] at java.nio.file.Files.walk(Files.java:3879) ~[?:?] at org.apache.pinot.spi.filesystem.LocalPinotFS.listFiles(LocalPinotFS.java:115) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.spi.filesystem.NoClosePinotFS.listFiles(NoClosePinotFS.java:86) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.common.segment.generation.SegmentGenerationUtils.listMatchedFilesWithRecursiveOption(SegmentGenerationUtils.java:259) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.run(SegmentGenerationJobRunner.java:177) ~[pinot-batch-ingestion-standalone-1.0.0-shaded.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:150) ~[pinot-all-1.0.0-jar-with-dependencies.jar:1.0.0-b6bdf6c9686b286a149d2d1aea4a385ee98f3e79] ... 13 more java.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:152) at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:121) at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:130) at org.apache.pinot.tools.Command.call(Command.java:33) at org.apache.pinot.tools.Command.call(Command.java:29) at picocli.CommandLine.executeUserObject(CommandLine.java:1953) at picocli.CommandLine.access$1300(CommandLine.java:145) at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352) at picocli.CommandLine$RunLast.handle(CommandLine.java:2346) at picocli.CommandLine$RunLast.handle(CommandLine.java:2311) at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179) at picocli.CommandLine.execute(CommandLine.java:2078) at org.apache.pinot.tools.admin.PinotAdministrator.execute(PinotAdministrator.java:171) at org.apache.pinot.tools.admin.PinotAdministrator.main(PinotAdministrator.java:202) Caused by: java.nio.file.NoSuchFileException: /tmp/pinot-quick-start/rawdata at java.base/sun.nio.fs.UnixException.translateToIOException(UnixException.java:92) at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111) at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:116) at java.base/sun.nio.fs.UnixFileAttributeViews$Basic.readAttributes(UnixFileAttributeViews.java:55) at java.base/sun.nio.fs.UnixFileSystemProvider.readAttributes(UnixFileSystemProvider.java:149) at java.base/sun.nio.fs.LinuxFileSystemProvider.readAttributes(LinuxFileSystemProvider.java:99) at java.base/java.nio.file.Files.readAttributes(Files.java:1764) at java.base/java.nio.file.FileTreeWalker.getAttributes(FileTreeWalker.java:219) at java.base/java.nio.file.FileTreeWalker.visit(FileTreeWalker.java:276) at java.base/java.nio.file.FileTreeWalker.walk(FileTreeWalker.java:322) at java.base/java.nio.file.FileTreeIterator.<init>(FileTreeIterator.java:71) at java.base/java.nio.file.Files.walk(Files.java:3825) at java.base/java.nio.file.Files.walk(Files.java:3879) at org.apache.pinot.spi.filesystem.LocalPinotFS.listFiles(LocalPinotFS.java:115) at org.apache.pinot.spi.filesystem.NoClosePinotFS.listFiles(NoClosePinotFS.java:86) at org.apache.pinot.common.segment.generation.SegmentGenerationUtils.listMatchedFilesWithRecursiveOption(SegmentGenerationUtils.java:259) at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.run(SegmentGenerationJobRunner.java:177) at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:150) ... 13 more ``` I followed several guides and tried `v1.1.0` but I keep seeing the error -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org