This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 50ac78c  URL encoded generated segment tar name (#6571)
50ac78c is described below

commit 50ac78ced4b332141888352d564b235cd9b01e42
Author: Xiang Fu <fx19880...@gmail.com>
AuthorDate: Tue Feb 16 01:02:16 2021 -0800

    URL encoded generated segment tar name (#6571)
---
 .../generation/SegmentGenerationUtilsTest.java     | 55 +++++++++++++++-------
 .../batch/hadoop/HadoopSegmentCreationMapper.java  |  3 +-
 .../spark/SparkSegmentGenerationJobRunner.java     |  3 +-
 .../standalone/SegmentGenerationJobRunner.java     |  3 +-
 4 files changed, 45 insertions(+), 19 deletions(-)

diff --git 
a/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java
 
b/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java
index b884b2d..841ad91 100644
--- 
a/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java
+++ 
b/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java
@@ -19,8 +19,10 @@
 
 package org.apache.pinot.common.segment.generation;
 
+import java.io.UnsupportedEncodingException;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.net.URLEncoder;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
@@ -39,31 +41,53 @@ public class SegmentGenerationUtilsTest {
     
Assert.assertEquals(SegmentGenerationUtils.getFileName(URI.create("hdfs://var/data/myTable/2020/04/06/input.data")),
         "input.data");
   }
-  
+
   // Confirm output path generation works with URIs that have 
authority/userInfo.
-  
   @Test
-  public void testRelativeURIs() throws URISyntaxException {
+  public void testRelativeURIs()
+      throws URISyntaxException {
     URI inputDirURI = new URI("hdfs://namenode1:9999/path/to/");
     URI inputFileURI = new URI("hdfs://namenode1:9999/path/to/subdir/file");
     URI outputDirURI = new URI("hdfs://namenode2/output/dir/");
     URI segmentTarFileName = new URI("file.tar.gz");
-    URI outputSegmentTarURI = SegmentGenerationUtils
-        .getRelativeOutputPath(inputDirURI, inputFileURI, 
outputDirURI).resolve(segmentTarFileName);
+    URI outputSegmentTarURI = 
SegmentGenerationUtils.getRelativeOutputPath(inputDirURI, inputFileURI, 
outputDirURI)
+        .resolve(segmentTarFileName);
+    Assert.assertEquals(outputSegmentTarURI.toString(), 
"hdfs://namenode2/output/dir/subdir/file.tar.gz");
+  }
+
+  // Invalid segment tar name with space
+  @Test
+  public void testInvalidRelativeURIs()
+      throws URISyntaxException, UnsupportedEncodingException {
+    URI inputDirURI = new URI("hdfs://namenode1:9999/path/to/");
+    URI inputFileURI = new URI("hdfs://namenode1:9999/path/to/subdir/file");
+    URI outputDirURI = new URI("hdfs://namenode2/output/dir/");
+    try {
+      SegmentGenerationUtils.getRelativeOutputPath(inputDirURI, inputFileURI, 
outputDirURI)
+          .resolve(new 
URI("table_OFFLINE_2021-02-01_09:39:00.000_2021-02-01_11:59:00.000_2.tar.gz"));
+      Assert.fail("Expected an error thrown for uri resolve with space in 
segment name");
+    } catch (Exception e) {
+      Assert.assertTrue(e instanceof URISyntaxException);
+    }
+    URI outputSegmentTarURI = 
SegmentGenerationUtils.getRelativeOutputPath(inputDirURI, inputFileURI, 
outputDirURI)
+        .resolve(new URI(
+            
URLEncoder.encode("table_OFFLINE_2021-02-01_09:39:00.000_2021-02-01_11:59:00.000_2.tar.gz",
 "UTF-8")));
     Assert.assertEquals(outputSegmentTarURI.toString(),
-        "hdfs://namenode2/output/dir/subdir/file.tar.gz");
+        
"hdfs://namenode2/output/dir/subdir/table_OFFLINE_2021-02-01_09%3A39%3A00.000_2021-02-01_11%3A59%3A00.000_2.tar.gz");
   }
-  
+
   // Don't lose authority portion of inputDirURI when creating output files
   // https://github.com/apache/incubator-pinot/issues/6355
 
   @Test
-  public void testGetFileURI() throws Exception {
+  public void testGetFileURI()
+      throws Exception {
     // Raw path without scheme
     Assert.assertEquals(SegmentGenerationUtils.getFileURI("/path/to/file", new 
URI("file:/path/to")).toString(),
         "file:/path/to/file");
-    Assert.assertEquals(SegmentGenerationUtils.getFileURI("/path/to/file", new 
URI("hdfs://namenode/path/to")).toString(),
-        "hdfs://namenode/path/to/file");
+    Assert
+        .assertEquals(SegmentGenerationUtils.getFileURI("/path/to/file", new 
URI("hdfs://namenode/path/to")).toString(),
+            "hdfs://namenode/path/to/file");
     Assert.assertEquals(SegmentGenerationUtils.getFileURI("/path/to/file", new 
URI("hdfs:///path/to")).toString(),
         "hdfs:/path/to/file");
 
@@ -85,19 +109,18 @@ public class SegmentGenerationUtilsTest {
 
     // S3 URI with userInfo (username/password)
     validateFileURI(new URI("s3://username:password@bucket/path/to/"));
-    
   }
 
-  private void validateFileURI(URI directoryURI, String expectedPrefix) throws 
URISyntaxException {
+  private void validateFileURI(URI directoryURI, String expectedPrefix)
+      throws URISyntaxException {
     URI fileURI = new URI(directoryURI.toString() + "file");
     String rawPath = fileURI.getRawPath();
 
-    Assert.assertEquals(SegmentGenerationUtils.getFileURI(rawPath, 
fileURI).toString(),
-        expectedPrefix + "file");
+    Assert.assertEquals(SegmentGenerationUtils.getFileURI(rawPath, 
fileURI).toString(), expectedPrefix + "file");
   }
 
-  private void validateFileURI(URI directoryURI) throws URISyntaxException {
+  private void validateFileURI(URI directoryURI)
+      throws URISyntaxException {
     validateFileURI(directoryURI, directoryURI.toString());
   }
-
 }
diff --git 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java
 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java
index 2dc90af..8b71584 100644
--- 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java
+++ 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java
@@ -22,6 +22,7 @@ import com.google.common.base.Preconditions;
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
+import java.net.URLEncoder;
 import java.nio.file.Files;
 import java.util.List;
 import java.util.UUID;
@@ -172,7 +173,7 @@ public class HadoopSegmentCreationMapper extends 
Mapper<LongWritable, Text, Long
 
       // Tar segment directory to compress file
       File localSegmentDir = new File(localOutputTempDir, segmentName);
-      String segmentTarFileName = segmentName + Constants.TAR_GZ_FILE_EXT;
+      String segmentTarFileName = URLEncoder.encode(segmentName + 
Constants.TAR_GZ_FILE_EXT, "UTF-8");
       File localSegmentTarFile = new File(localOutputTempDir, 
segmentTarFileName);
       LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, 
localSegmentTarFile);
       TarGzCompressionUtils.createTarGzFile(localSegmentDir, 
localSegmentTarFile);
diff --git 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java
 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java
index ac8331e..a315d15 100644
--- 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java
+++ 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java
@@ -29,6 +29,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.Serializable;
 import java.net.URI;
+import java.net.URLEncoder;
 import java.nio.file.FileSystems;
 import java.nio.file.Path;
 import java.nio.file.PathMatcher;
@@ -312,7 +313,7 @@ public class SparkSegmentGenerationJobRunner implements 
IngestionJobRunner, Seri
 
           // Tar segment directory to compress file
           File localSegmentDir = new File(localOutputTempDir, segmentName);
-          String segmentTarFileName = segmentName + Constants.TAR_GZ_FILE_EXT;
+          String segmentTarFileName = URLEncoder.encode(segmentName + 
Constants.TAR_GZ_FILE_EXT, "UTF-8");
           File localSegmentTarFile = new File(localOutputTempDir, 
segmentTarFileName);
           LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, 
localSegmentTarFile);
           TarGzCompressionUtils.createTarGzFile(localSegmentDir, 
localSegmentTarFile);
diff --git 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
index 67c4471..efe1679 100644
--- 
a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
+++ 
b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java
@@ -20,6 +20,7 @@ package org.apache.pinot.plugin.ingestion.batch.standalone;
 
 import java.io.File;
 import java.net.URI;
+import java.net.URLEncoder;
 import java.nio.file.FileSystems;
 import java.nio.file.PathMatcher;
 import java.nio.file.Paths;
@@ -198,7 +199,7 @@ public class SegmentGenerationJobRunner implements 
IngestionJobRunner {
             String segmentName = taskRunner.run();
             // Tar segment directory to compress file
             localSegmentDir = new File(localOutputTempDir, segmentName);
-            String segmentTarFileName = segmentName + 
Constants.TAR_GZ_FILE_EXT;
+            String segmentTarFileName = URLEncoder.encode(segmentName + 
Constants.TAR_GZ_FILE_EXT, "UTF-8");
             localSegmentTarFile = new File(localOutputTempDir, 
segmentTarFileName);
             LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, 
localSegmentTarFile);
             TarGzCompressionUtils.createTarGzFile(localSegmentDir, 
localSegmentTarFile);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to