This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new 50ac78c URL encoded generated segment tar name (#6571) 50ac78c is described below commit 50ac78ced4b332141888352d564b235cd9b01e42 Author: Xiang Fu <fx19880...@gmail.com> AuthorDate: Tue Feb 16 01:02:16 2021 -0800 URL encoded generated segment tar name (#6571) --- .../generation/SegmentGenerationUtilsTest.java | 55 +++++++++++++++------- .../batch/hadoop/HadoopSegmentCreationMapper.java | 3 +- .../spark/SparkSegmentGenerationJobRunner.java | 3 +- .../standalone/SegmentGenerationJobRunner.java | 3 +- 4 files changed, 45 insertions(+), 19 deletions(-) diff --git a/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java index b884b2d..841ad91 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java @@ -19,8 +19,10 @@ package org.apache.pinot.common.segment.generation; +import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; +import java.net.URLEncoder; import org.testng.Assert; import org.testng.annotations.Test; @@ -39,31 +41,53 @@ public class SegmentGenerationUtilsTest { Assert.assertEquals(SegmentGenerationUtils.getFileName(URI.create("hdfs://var/data/myTable/2020/04/06/input.data")), "input.data"); } - + // Confirm output path generation works with URIs that have authority/userInfo. - @Test - public void testRelativeURIs() throws URISyntaxException { + public void testRelativeURIs() + throws URISyntaxException { URI inputDirURI = new URI("hdfs://namenode1:9999/path/to/"); URI inputFileURI = new URI("hdfs://namenode1:9999/path/to/subdir/file"); URI outputDirURI = new URI("hdfs://namenode2/output/dir/"); URI segmentTarFileName = new URI("file.tar.gz"); - URI outputSegmentTarURI = SegmentGenerationUtils - .getRelativeOutputPath(inputDirURI, inputFileURI, outputDirURI).resolve(segmentTarFileName); + URI outputSegmentTarURI = SegmentGenerationUtils.getRelativeOutputPath(inputDirURI, inputFileURI, outputDirURI) + .resolve(segmentTarFileName); + Assert.assertEquals(outputSegmentTarURI.toString(), "hdfs://namenode2/output/dir/subdir/file.tar.gz"); + } + + // Invalid segment tar name with space + @Test + public void testInvalidRelativeURIs() + throws URISyntaxException, UnsupportedEncodingException { + URI inputDirURI = new URI("hdfs://namenode1:9999/path/to/"); + URI inputFileURI = new URI("hdfs://namenode1:9999/path/to/subdir/file"); + URI outputDirURI = new URI("hdfs://namenode2/output/dir/"); + try { + SegmentGenerationUtils.getRelativeOutputPath(inputDirURI, inputFileURI, outputDirURI) + .resolve(new URI("table_OFFLINE_2021-02-01_09:39:00.000_2021-02-01_11:59:00.000_2.tar.gz")); + Assert.fail("Expected an error thrown for uri resolve with space in segment name"); + } catch (Exception e) { + Assert.assertTrue(e instanceof URISyntaxException); + } + URI outputSegmentTarURI = SegmentGenerationUtils.getRelativeOutputPath(inputDirURI, inputFileURI, outputDirURI) + .resolve(new URI( + URLEncoder.encode("table_OFFLINE_2021-02-01_09:39:00.000_2021-02-01_11:59:00.000_2.tar.gz", "UTF-8"))); Assert.assertEquals(outputSegmentTarURI.toString(), - "hdfs://namenode2/output/dir/subdir/file.tar.gz"); + "hdfs://namenode2/output/dir/subdir/table_OFFLINE_2021-02-01_09%3A39%3A00.000_2021-02-01_11%3A59%3A00.000_2.tar.gz"); } - + // Don't lose authority portion of inputDirURI when creating output files // https://github.com/apache/incubator-pinot/issues/6355 @Test - public void testGetFileURI() throws Exception { + public void testGetFileURI() + throws Exception { // Raw path without scheme Assert.assertEquals(SegmentGenerationUtils.getFileURI("/path/to/file", new URI("file:/path/to")).toString(), "file:/path/to/file"); - Assert.assertEquals(SegmentGenerationUtils.getFileURI("/path/to/file", new URI("hdfs://namenode/path/to")).toString(), - "hdfs://namenode/path/to/file"); + Assert + .assertEquals(SegmentGenerationUtils.getFileURI("/path/to/file", new URI("hdfs://namenode/path/to")).toString(), + "hdfs://namenode/path/to/file"); Assert.assertEquals(SegmentGenerationUtils.getFileURI("/path/to/file", new URI("hdfs:///path/to")).toString(), "hdfs:/path/to/file"); @@ -85,19 +109,18 @@ public class SegmentGenerationUtilsTest { // S3 URI with userInfo (username/password) validateFileURI(new URI("s3://username:password@bucket/path/to/")); - } - private void validateFileURI(URI directoryURI, String expectedPrefix) throws URISyntaxException { + private void validateFileURI(URI directoryURI, String expectedPrefix) + throws URISyntaxException { URI fileURI = new URI(directoryURI.toString() + "file"); String rawPath = fileURI.getRawPath(); - Assert.assertEquals(SegmentGenerationUtils.getFileURI(rawPath, fileURI).toString(), - expectedPrefix + "file"); + Assert.assertEquals(SegmentGenerationUtils.getFileURI(rawPath, fileURI).toString(), expectedPrefix + "file"); } - private void validateFileURI(URI directoryURI) throws URISyntaxException { + private void validateFileURI(URI directoryURI) + throws URISyntaxException { validateFileURI(directoryURI, directoryURI.toString()); } - } diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java index 2dc90af..8b71584 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import java.io.File; import java.io.IOException; import java.net.URI; +import java.net.URLEncoder; import java.nio.file.Files; import java.util.List; import java.util.UUID; @@ -172,7 +173,7 @@ public class HadoopSegmentCreationMapper extends Mapper<LongWritable, Text, Long // Tar segment directory to compress file File localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = segmentName + Constants.TAR_GZ_FILE_EXT; + String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); File localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java index ac8331e..a315d15 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java @@ -29,6 +29,7 @@ import java.io.File; import java.io.IOException; import java.io.Serializable; import java.net.URI; +import java.net.URLEncoder; import java.nio.file.FileSystems; import java.nio.file.Path; import java.nio.file.PathMatcher; @@ -312,7 +313,7 @@ public class SparkSegmentGenerationJobRunner implements IngestionJobRunner, Seri // Tar segment directory to compress file File localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = segmentName + Constants.TAR_GZ_FILE_EXT; + String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); File localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java index 67c4471..efe1679 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java @@ -20,6 +20,7 @@ package org.apache.pinot.plugin.ingestion.batch.standalone; import java.io.File; import java.net.URI; +import java.net.URLEncoder; import java.nio.file.FileSystems; import java.nio.file.PathMatcher; import java.nio.file.Paths; @@ -198,7 +199,7 @@ public class SegmentGenerationJobRunner implements IngestionJobRunner { String segmentName = taskRunner.run(); // Tar segment directory to compress file localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = segmentName + Constants.TAR_GZ_FILE_EXT; + String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org