This is an automated email from the ASF dual-hosted git repository. yashmayya pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 76e1506336 Specify start index of file name in GenerateDataCommand (#14573) 76e1506336 is described below commit 76e1506336e0ab9e02a4e82e99fe60df5ac54ec3 Author: Rajat Venkatesh <1638298+vra...@users.noreply.github.com> AuthorDate: Mon Dec 2 16:36:05 2024 +0530 Specify start index of file name in GenerateDataCommand (#14573) --- .../recommender/data/DataGenerationHelpers.java | 38 ++++++++++++++++++---- .../recommender/data/writer/AvroWriter.java | 3 +- .../recommender/data/writer/AvroWriterSpec.java | 4 +-- .../recommender/data/writer/FileWriter.java | 2 +- .../recommender/data/writer/FileWriterSpec.java | 5 +-- .../recommender/data/writer/WriterSpec.java | 8 ++++- .../tools/admin/command/GenerateDataCommand.java | 9 +++-- 7 files changed, 53 insertions(+), 16 deletions(-) diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java index 0ef07b96ed..1226e237e4 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java @@ -48,24 +48,50 @@ public final class DataGenerationHelpers { private static final Logger LOGGER = LoggerFactory.getLogger(DataGenerationHelpers.class); + public static final int DEFAULT_FILE_INDEX = 0; + + public static void generateAvro(DataGenerator generator, long totalDocs, int numFiles, String outDir, + boolean isOverrideOutDir) + throws Exception { + generateAvro(generator, totalDocs, numFiles, outDir, isOverrideOutDir, DEFAULT_FILE_INDEX); + } + + public static void generateCsv(DataGenerator generator, long totalDocs, int numFiles, String outDir, + boolean isOverrideOutDir) + throws Exception { + generateCsv(generator, totalDocs, numFiles, outDir, isOverrideOutDir, DEFAULT_FILE_INDEX); + } + + public static void generateJson(DataGenerator generator, long totalDocs, int numFiles, String outDir, + boolean isOverrideOutDir) + throws Exception { + generateJson(generator, totalDocs, numFiles, outDir, isOverrideOutDir, DEFAULT_FILE_INDEX); + } + public static void generateAvro(DataGenerator generator, long totalDocs, int numFiles, String outDir, - boolean isOverrideOutDir) throws Exception { + boolean isOverrideOutDir, int fileIndex) + throws Exception { AvroWriter avroWriter = new AvroWriter(); - avroWriter.init(new AvroWriterSpec(generator, handleOutDir(outDir, isOverrideOutDir), totalDocs, numFiles)); + avroWriter.init( + new AvroWriterSpec(generator, handleOutDir(outDir, isOverrideOutDir), totalDocs, numFiles, fileIndex)); avroWriter.write(); } public static void generateCsv(DataGenerator generator, long totalDocs, int numFiles, String outDir, - boolean isOverrideOutDir) throws Exception { + boolean isOverrideOutDir, int fileIndex) + throws Exception { CsvWriter csvWriter = new CsvWriter(); - csvWriter.init(new FileWriterSpec(generator, handleOutDir(outDir, isOverrideOutDir), totalDocs, numFiles)); + csvWriter.init( + new FileWriterSpec(generator, handleOutDir(outDir, isOverrideOutDir), totalDocs, numFiles, fileIndex)); csvWriter.write(); } public static void generateJson(DataGenerator generator, long totalDocs, int numFiles, String outDir, - boolean isOverrideOutDir) throws Exception { + boolean isOverrideOutDir, int fileIndex) + throws Exception { JsonWriter jsonWriter = new JsonWriter(); - jsonWriter.init(new FileWriterSpec(generator, handleOutDir(outDir, isOverrideOutDir), totalDocs, numFiles)); + jsonWriter.init( + new FileWriterSpec(generator, handleOutDir(outDir, isOverrideOutDir), totalDocs, numFiles, fileIndex)); jsonWriter.write(); } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriter.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriter.java index 1295b8991d..5b82bf1024 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriter.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriter.java @@ -67,7 +67,8 @@ public class AvroWriter implements Writer { final int numPerFiles = (int) (_spec.getTotalDocs() / _spec.getNumFiles()); for (int i = 0; i < _spec.getNumFiles(); i++) { try (AvroRecordAppender appender = new AvroRecordAppender( - new File(_spec.getBaseDir(), "part-" + i + ".avro"), getAvroSchema(_spec.getSchema()))) { + new File(_spec.getBaseDir(), "part-" + (i + _spec.getFileIndex()) + ".avro"), + getAvroSchema(_spec.getSchema()))) { for (int j = 0; j < numPerFiles; j++) { appender.append(_spec.getGenerator().nextRow()); } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriterSpec.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriterSpec.java index a9ce271130..09b822c10e 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriterSpec.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriterSpec.java @@ -30,8 +30,8 @@ public class AvroWriterSpec extends WriterSpec { private final int _numFiles; private final Schema _schema; - public AvroWriterSpec(DataGenerator generator, File baseDir, long totalDocs, int numFiles) { - super(generator); + public AvroWriterSpec(DataGenerator generator, File baseDir, long totalDocs, int numFiles, int fileIndex) { + super(generator, fileIndex); _baseDir = baseDir; _totalDocs = totalDocs; _numFiles = numFiles; diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriter.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriter.java index f02f245ced..b750c3a59b 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriter.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriter.java @@ -41,7 +41,7 @@ public abstract class FileWriter implements Writer { final long docsPerFile = (long) Math.ceil((double) totalDocs / _spec.getNumFiles()); final String extension = getExtension() == null ? "" : String.format(".%s", getExtension()); long ingestedDocs = 0; - int fileIndex = 0; + int fileIndex = _spec.getFileIndex(); while (ingestedDocs < totalDocs) { try (java.io.FileWriter writer = new java.io.FileWriter(new File(_spec.getBaseDir(), String.format("output_%d%s", fileIndex, extension)))) { diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriterSpec.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriterSpec.java index a7ef8cd496..01bd407ea9 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriterSpec.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriterSpec.java @@ -26,8 +26,9 @@ public class FileWriterSpec extends WriterSpec { private final File _baseDir; private final long _totalDocs; private final int _numFiles; - public FileWriterSpec(DataGenerator generator, File baseDir, long totalDocs, int numFiles) { - super(generator); + + public FileWriterSpec(DataGenerator generator, File baseDir, long totalDocs, int numFiles, int fileIndex) { + super(generator, fileIndex); _baseDir = baseDir; _totalDocs = totalDocs; _numFiles = numFiles; diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/WriterSpec.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/WriterSpec.java index 0a82601fa7..3393227e11 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/WriterSpec.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/WriterSpec.java @@ -23,12 +23,18 @@ import org.apache.pinot.controller.recommender.data.generator.DataGenerator; public class WriterSpec { private final DataGenerator _generator; + private final int _fileIndex; - public WriterSpec(DataGenerator generator) { + public WriterSpec(DataGenerator generator, int fileIndex) { _generator = generator; + _fileIndex = fileIndex; } public DataGenerator getGenerator() { return _generator; } + + public int getFileIndex() { + return _fileIndex; + } } diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java index efcc130188..a7f556509f 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java @@ -73,6 +73,9 @@ public class GenerateDataCommand extends AbstractBaseAdminCommand implements Com description = "Output format ('AVRO' or 'CSV' or 'JSON').") private String _format = FORMAT_AVRO; + @CommandLine.Option(names = {"-fileIndex"}, help = true, description = "Starting file index") + private int _fileIndex = DataGenerationHelpers.DEFAULT_FILE_INDEX; + public void init(int numRecords, int numFiles, String schemaFile, String outDir) { _numRecords = numRecords; _numFiles = numFiles; @@ -120,11 +123,11 @@ public class GenerateDataCommand extends AbstractBaseAdminCommand implements Com gen.init(spec); if (FORMAT_AVRO.equalsIgnoreCase(_format)) { - DataGenerationHelpers.generateAvro(gen, _numRecords, _numFiles, _outDir, _overwrite); + DataGenerationHelpers.generateAvro(gen, _numRecords, _numFiles, _outDir, _overwrite, _fileIndex); } else if (FORMAT_CSV.equalsIgnoreCase(_format)) { - DataGenerationHelpers.generateCsv(gen, _numRecords, _numFiles, _outDir, _overwrite); + DataGenerationHelpers.generateCsv(gen, _numRecords, _numFiles, _outDir, _overwrite, _fileIndex); } else if (FORMAT_JSON.equalsIgnoreCase(_format)) { - DataGenerationHelpers.generateJson(gen, _numRecords, _numFiles, _outDir, _overwrite); + DataGenerationHelpers.generateJson(gen, _numRecords, _numFiles, _outDir, _overwrite, _fileIndex); } else { throw new IllegalArgumentException(String.format("Invalid output format '%s'", _format)); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org