This is an automated email from the ASF dual-hosted git repository.

yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 76e1506336 Specify start index of file name in GenerateDataCommand 
(#14573)
76e1506336 is described below

commit 76e1506336e0ab9e02a4e82e99fe60df5ac54ec3
Author: Rajat Venkatesh <1638298+vra...@users.noreply.github.com>
AuthorDate: Mon Dec 2 16:36:05 2024 +0530

    Specify start index of file name in GenerateDataCommand (#14573)
---
 .../recommender/data/DataGenerationHelpers.java    | 38 ++++++++++++++++++----
 .../recommender/data/writer/AvroWriter.java        |  3 +-
 .../recommender/data/writer/AvroWriterSpec.java    |  4 +--
 .../recommender/data/writer/FileWriter.java        |  2 +-
 .../recommender/data/writer/FileWriterSpec.java    |  5 +--
 .../recommender/data/writer/WriterSpec.java        |  8 ++++-
 .../tools/admin/command/GenerateDataCommand.java   |  9 +++--
 7 files changed, 53 insertions(+), 16 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java
index 0ef07b96ed..1226e237e4 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/DataGenerationHelpers.java
@@ -48,24 +48,50 @@ public final class DataGenerationHelpers {
 
   private static final Logger LOGGER = 
LoggerFactory.getLogger(DataGenerationHelpers.class);
 
+  public static final int DEFAULT_FILE_INDEX = 0;
+
+  public static void generateAvro(DataGenerator generator, long totalDocs, int 
numFiles, String outDir,
+      boolean isOverrideOutDir)
+      throws Exception {
+    generateAvro(generator, totalDocs, numFiles, outDir, isOverrideOutDir, 
DEFAULT_FILE_INDEX);
+  }
+
+  public static void generateCsv(DataGenerator generator, long totalDocs, int 
numFiles, String outDir,
+      boolean isOverrideOutDir)
+      throws Exception {
+    generateCsv(generator, totalDocs, numFiles, outDir, isOverrideOutDir, 
DEFAULT_FILE_INDEX);
+  }
+
+  public static void generateJson(DataGenerator generator, long totalDocs, int 
numFiles, String outDir,
+      boolean isOverrideOutDir)
+      throws Exception {
+    generateJson(generator, totalDocs, numFiles, outDir, isOverrideOutDir, 
DEFAULT_FILE_INDEX);
+  }
+
   public static void generateAvro(DataGenerator generator, long totalDocs, int 
numFiles, String outDir,
-      boolean isOverrideOutDir) throws Exception {
+      boolean isOverrideOutDir, int fileIndex)
+      throws Exception {
     AvroWriter avroWriter = new AvroWriter();
-    avroWriter.init(new AvroWriterSpec(generator, handleOutDir(outDir, 
isOverrideOutDir), totalDocs, numFiles));
+    avroWriter.init(
+        new AvroWriterSpec(generator, handleOutDir(outDir, isOverrideOutDir), 
totalDocs, numFiles, fileIndex));
     avroWriter.write();
   }
 
   public static void generateCsv(DataGenerator generator, long totalDocs, int 
numFiles, String outDir,
-      boolean isOverrideOutDir) throws Exception {
+      boolean isOverrideOutDir, int fileIndex)
+      throws Exception {
     CsvWriter csvWriter = new CsvWriter();
-    csvWriter.init(new FileWriterSpec(generator, handleOutDir(outDir, 
isOverrideOutDir), totalDocs, numFiles));
+    csvWriter.init(
+        new FileWriterSpec(generator, handleOutDir(outDir, isOverrideOutDir), 
totalDocs, numFiles, fileIndex));
     csvWriter.write();
   }
 
   public static void generateJson(DataGenerator generator, long totalDocs, int 
numFiles, String outDir,
-      boolean isOverrideOutDir) throws Exception {
+      boolean isOverrideOutDir, int fileIndex)
+      throws Exception {
     JsonWriter jsonWriter = new JsonWriter();
-    jsonWriter.init(new FileWriterSpec(generator, handleOutDir(outDir, 
isOverrideOutDir), totalDocs, numFiles));
+    jsonWriter.init(
+        new FileWriterSpec(generator, handleOutDir(outDir, isOverrideOutDir), 
totalDocs, numFiles, fileIndex));
     jsonWriter.write();
   }
 
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriter.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriter.java
index 1295b8991d..5b82bf1024 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriter.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriter.java
@@ -67,7 +67,8 @@ public class AvroWriter implements Writer {
     final int numPerFiles = (int) (_spec.getTotalDocs() / _spec.getNumFiles());
     for (int i = 0; i < _spec.getNumFiles(); i++) {
       try (AvroRecordAppender appender = new AvroRecordAppender(
-          new File(_spec.getBaseDir(), "part-" + i + ".avro"), 
getAvroSchema(_spec.getSchema()))) {
+          new File(_spec.getBaseDir(), "part-" + (i + _spec.getFileIndex()) + 
".avro"),
+          getAvroSchema(_spec.getSchema()))) {
         for (int j = 0; j < numPerFiles; j++) {
           appender.append(_spec.getGenerator().nextRow());
         }
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriterSpec.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriterSpec.java
index a9ce271130..09b822c10e 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriterSpec.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/AvroWriterSpec.java
@@ -30,8 +30,8 @@ public class AvroWriterSpec extends WriterSpec {
   private final int _numFiles;
   private final Schema _schema;
 
-  public AvroWriterSpec(DataGenerator generator, File baseDir, long totalDocs, 
int numFiles) {
-    super(generator);
+  public AvroWriterSpec(DataGenerator generator, File baseDir, long totalDocs, 
int numFiles, int fileIndex) {
+    super(generator, fileIndex);
     _baseDir = baseDir;
     _totalDocs = totalDocs;
     _numFiles = numFiles;
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriter.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriter.java
index f02f245ced..b750c3a59b 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriter.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriter.java
@@ -41,7 +41,7 @@ public abstract class FileWriter implements Writer {
     final long docsPerFile = (long) Math.ceil((double) totalDocs / 
_spec.getNumFiles());
     final String extension = getExtension() == null ? "" : 
String.format(".%s", getExtension());
     long ingestedDocs = 0;
-    int fileIndex = 0;
+    int fileIndex = _spec.getFileIndex();
     while (ingestedDocs < totalDocs) {
       try (java.io.FileWriter writer =
           new java.io.FileWriter(new File(_spec.getBaseDir(), 
String.format("output_%d%s", fileIndex, extension)))) {
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriterSpec.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriterSpec.java
index a7ef8cd496..01bd407ea9 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriterSpec.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/FileWriterSpec.java
@@ -26,8 +26,9 @@ public class FileWriterSpec extends WriterSpec {
   private final File _baseDir;
   private final long _totalDocs;
   private final int _numFiles;
-  public FileWriterSpec(DataGenerator generator, File baseDir, long totalDocs, 
int numFiles) {
-    super(generator);
+
+  public FileWriterSpec(DataGenerator generator, File baseDir, long totalDocs, 
int numFiles, int fileIndex) {
+    super(generator, fileIndex);
     _baseDir = baseDir;
     _totalDocs = totalDocs;
     _numFiles = numFiles;
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/WriterSpec.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/WriterSpec.java
index 0a82601fa7..3393227e11 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/WriterSpec.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/data/writer/WriterSpec.java
@@ -23,12 +23,18 @@ import 
org.apache.pinot.controller.recommender.data.generator.DataGenerator;
 
 public class WriterSpec {
   private final DataGenerator _generator;
+  private final int _fileIndex;
 
-  public WriterSpec(DataGenerator generator) {
+  public WriterSpec(DataGenerator generator, int fileIndex) {
     _generator = generator;
+    _fileIndex = fileIndex;
   }
 
   public DataGenerator getGenerator() {
     return _generator;
   }
+
+  public int getFileIndex() {
+    return _fileIndex;
+  }
 }
diff --git 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java
 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java
index efcc130188..a7f556509f 100644
--- 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java
+++ 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/GenerateDataCommand.java
@@ -73,6 +73,9 @@ public class GenerateDataCommand extends 
AbstractBaseAdminCommand implements Com
       description = "Output format ('AVRO' or 'CSV' or 'JSON').")
   private String _format = FORMAT_AVRO;
 
+  @CommandLine.Option(names = {"-fileIndex"}, help = true, description = 
"Starting file index")
+  private int _fileIndex = DataGenerationHelpers.DEFAULT_FILE_INDEX;
+
   public void init(int numRecords, int numFiles, String schemaFile, String 
outDir) {
     _numRecords = numRecords;
     _numFiles = numFiles;
@@ -120,11 +123,11 @@ public class GenerateDataCommand extends 
AbstractBaseAdminCommand implements Com
     gen.init(spec);
 
     if (FORMAT_AVRO.equalsIgnoreCase(_format)) {
-      DataGenerationHelpers.generateAvro(gen, _numRecords, _numFiles, _outDir, 
_overwrite);
+      DataGenerationHelpers.generateAvro(gen, _numRecords, _numFiles, _outDir, 
_overwrite, _fileIndex);
     } else if (FORMAT_CSV.equalsIgnoreCase(_format)) {
-      DataGenerationHelpers.generateCsv(gen, _numRecords, _numFiles, _outDir, 
_overwrite);
+      DataGenerationHelpers.generateCsv(gen, _numRecords, _numFiles, _outDir, 
_overwrite, _fileIndex);
     } else if (FORMAT_JSON.equalsIgnoreCase(_format)) {
-      DataGenerationHelpers.generateJson(gen, _numRecords, _numFiles, _outDir, 
_overwrite);
+      DataGenerationHelpers.generateJson(gen, _numRecords, _numFiles, _outDir, 
_overwrite, _fileIndex);
     } else {
       throw new IllegalArgumentException(String.format("Invalid output format 
'%s'", _format));
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to