This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 6e224a1fd Simplify path naming
6e224a1fd is described below

commit 6e224a1fd33196e908b56f69db9e978f096fbc54
Author: tallison <[email protected]>
AuthorDate: Mon Aug 4 11:40:29 2025 -0400

    Simplify path naming
---
 .../src/main/java/org/apache/tika/cli/TikaCLI.java | 34 ++++++++++------------
 .../test/java/org/apache/tika/cli/TikaCLITest.java |  2 +-
 .../tika/pipes/emitter/fs/FileSystemEmitter.java   |  9 ++++--
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index ad859c561..aefc03660 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -21,7 +21,6 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -123,7 +122,7 @@ public class TikaCLI {
             return new DefaultHandler();
         }
     };
-    private File extractDir = new File(".");
+    private Path extractDir = Paths.get(".");
     private ParseContext context;
     private Detector detector;
     private Parser parser;
@@ -449,7 +448,7 @@ public class TikaCLI {
             if (dirPath.isEmpty()) {
                 dirPath = ".";
             }
-            extractDir = new File(dirPath);
+            extractDir = Paths.get(dirPath);
         } else if (arg.equals("-z") || arg.equals("--extract")) {
             type = NO_OUTPUT;
             context.set(EmbeddedDocumentExtractor.class, new 
FileEmbeddedDocumentExtractor());
@@ -1095,22 +1094,20 @@ public class TikaCLI {
             MediaType contentType = detector.detect(tis, metadata);
 
             String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
-            File outputFile = null;
+            Path outputFile = null;
             if (name == null) {
-                name = "file" + count++;
+                name = "file_" + count++;
             }
             outputFile = getOutputFile(name, metadata, contentType);
 
 
-            File parent = outputFile.getParentFile();
-            if (!parent.exists()) {
-                if (!parent.mkdirs()) {
-                    throw new IOException("unable to create directory \"" + 
parent + "\"");
-                }
+            Path parent = outputFile.getParent();
+            if (parent != null && ! Files.isDirectory(parent)) {
+                Files.createDirectories(parent);
             }
             System.out.println("Extracting '" + name + "' (" + contentType + 
") to " + outputFile);
 
-            try (FileOutputStream os = new FileOutputStream(outputFile)) {
+            try (OutputStream os = Files.newOutputStream(outputFile)) {
                 if (embeddedStreamTranslator.shouldTranslate(tis, metadata)) {
                     try (InputStream translated = 
embeddedStreamTranslator.translate(tis, metadata)) {
                         IOUtils.copy(translated, os);
@@ -1127,7 +1124,7 @@ public class TikaCLI {
             }
         }
 
-        private File getOutputFile(String name, Metadata metadata, MediaType 
contentType) {
+        private Path getOutputFile(String name, Metadata metadata, MediaType 
contentType) throws IOException {
             String ext = getExtension(contentType);
             if (name.indexOf('.') == -1 && contentType != null) {
                 name += ext;
@@ -1154,13 +1151,14 @@ public class TikaCLI {
             if (prefixLength > -1) {
                 normalizedName = normalizedName.substring(prefixLength);
             }
-            File outputFile = new File(extractDir, normalizedName);
+            Path outputFile = extractDir.resolve(normalizedName);
             //if file already exists, prepend uuid
-            if (outputFile.exists()) {
+            if (Files.exists(outputFile)) {
                 String fileName = FilenameUtils.getName(normalizedName);
-                outputFile = new File(extractDir, UUID
-                        .randomUUID()
-                        .toString() + "-" + fileName);
+                outputFile = extractDir.resolve( UUID.randomUUID() + "-" + 
fileName);
+            }
+            if (! 
outputFile.toAbsolutePath().normalize().startsWith(extractDir.toAbsolutePath().normalize()))
 {
+                throw new IOException("Path traversal?!: " + 
outputFile.toAbsolutePath());
             }
             return outputFile;
         }
@@ -1177,7 +1175,7 @@ public class TikaCLI {
                     return ext;
                 }
             } catch (MimeTypeException e) {
-                e.printStackTrace();
+                LOG.info("bad mime type?", e);
             }
             return ".bin";
 
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index 9b20cb141..13bf4153d 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -295,7 +295,7 @@ public class TikaCLITest {
 
     @Test
     public void testExtractSimple() throws Exception {
-        String[] expectedChildren = new String[]{"MBD002B040A.cdx", 
"file4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file0.emf"};
+        String[] expectedChildren = new String[]{"MBD002B040A.cdx", 
"file_4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file_0.emf"};
         testExtract("/coffee.xls", expectedChildren, 8);
     }
 
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
 
b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
index 9142c9b7b..0da31981c 100644
--- 
a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
+++ 
b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
@@ -76,20 +76,23 @@ public class FileSystemEmitter extends AbstractEmitter 
implements StreamEmitter
     @Override
     public void emit(String emitKey, List<Metadata> metadataList, ParseContext 
parseContext) throws IOException, TikaEmitterException {
         Path output;
-        if (metadataList == null || metadataList.size() == 0) {
+        if (metadataList == null || metadataList.isEmpty()) {
             throw new TikaEmitterException("metadata list must not be null or 
of size 0");
         }
 
-        if (fileExtension != null && fileExtension.length() > 0) {
+        if (fileExtension != null && ! fileExtension.isEmpty()) {
             emitKey += "." + fileExtension;
         }
         if (basePath != null) {
             output = basePath.resolve(emitKey);
+            if 
(!output.toAbsolutePath().normalize().startsWith(basePath.toAbsolutePath().normalize()))
 {
+                throw new TikaEmitterException("path traversal?! " + 
output.toAbsolutePath());
+            }
         } else {
             output = Paths.get(emitKey);
         }
 
-        if (!Files.isDirectory(output.getParent())) {
+        if (output.getParent() != null && 
!Files.isDirectory(output.getParent())) {
             Files.createDirectories(output.getParent());
         }
         try (Writer writer = Files.newBufferedWriter(output, 
StandardCharsets.UTF_8)) {

Reply via email to