This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_3x by this push:
     new 0c89f4b89 Simplify path naming
0c89f4b89 is described below

commit 0c89f4b894395af57d441e0cbebdf2cb6657416d
Author: tallison <[email protected]>
AuthorDate: Mon Aug 4 11:40:29 2025 -0400

    Simplify path naming
    
    (cherry picked from commit 6e224a1fd33196e908b56f69db9e978f096fbc54)
    
    # Conflicts:
    #       tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
---
 .../src/main/java/org/apache/tika/cli/TikaCLI.java | 36 ++++++++++------------
 .../test/java/org/apache/tika/cli/TikaCLITest.java |  2 +-
 .../tika/pipes/emitter/fs/FileSystemEmitter.java   |  9 ++++--
 3 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 41eccad3c..3c1af1c65 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -21,7 +21,6 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -124,7 +123,7 @@ public class TikaCLI {
             return new DefaultHandler();
         }
     };
-    private File extractDir = new File(".");
+    private Path extractDir = Paths.get(".");
     private ParseContext context;
     private Detector detector;
     private Parser parser;
@@ -339,7 +338,7 @@ public class TikaCLI {
             pdfParserConfig.setExtractIncrementalUpdateInfo(true);
             pdfParserConfig.setParseIncrementalUpdates(true);
             String warn = "As a convenience, TikaCLI has turned on extraction 
of\n" +
-                    "inline images and incremental updates for the PDFParser 
(TIKA-2374, " +
+                    "inline images and parsing of incremental updates for the 
PDFParser (TIKA-2374, " +
                     "TIKA-4017 and TIKA-4354).\n" +
                     "This is not the default behavior in Tika generally or in 
tika-server.";
             LOG.info(warn);
@@ -441,7 +440,7 @@ public class TikaCLI {
             if (dirPath.isEmpty()) {
                 dirPath = ".";
             }
-            extractDir = new File(dirPath);
+            extractDir = Paths.get(dirPath);
         } else if (arg.equals("-z") || arg.equals("--extract")) {
             type = NO_OUTPUT;
             context.set(EmbeddedDocumentExtractor.class, new 
FileEmbeddedDocumentExtractor());
@@ -1089,22 +1088,20 @@ public class TikaCLI {
             MediaType contentType = detector.detect(inputStream, metadata);
 
             String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
-            File outputFile = null;
+            Path outputFile = null;
             if (name == null) {
-                name = "file" + count++;
+                name = "file_" + count++;
             }
             outputFile = getOutputFile(name, metadata, contentType);
 
 
-            File parent = outputFile.getParentFile();
-            if (!parent.exists()) {
-                if (!parent.mkdirs()) {
-                    throw new IOException("unable to create directory \"" + 
parent + "\"");
-                }
+            Path parent = outputFile.getParent();
+            if (parent != null && ! Files.isDirectory(parent)) {
+                Files.createDirectories(parent);
             }
             System.out.println("Extracting '" + name + "' (" + contentType + 
") to " + outputFile);
 
-            try (FileOutputStream os = new FileOutputStream(outputFile)) {
+            try (OutputStream os = Files.newOutputStream(outputFile)) {
                 if (embeddedStreamTranslator.shouldTranslate(inputStream, 
metadata)) {
                     try (InputStream translated = 
embeddedStreamTranslator.translate(inputStream, metadata)) {
                         IOUtils.copy(translated, os);
@@ -1121,7 +1118,7 @@ public class TikaCLI {
             }
         }
 
-        private File getOutputFile(String name, Metadata metadata, MediaType 
contentType) {
+        private Path getOutputFile(String name, Metadata metadata, MediaType 
contentType) throws IOException {
             String ext = getExtension(contentType);
             if (name.indexOf('.') == -1 && contentType != null) {
                 name += ext;
@@ -1148,13 +1145,14 @@ public class TikaCLI {
             if (prefixLength > -1) {
                 normalizedName = normalizedName.substring(prefixLength);
             }
-            File outputFile = new File(extractDir, normalizedName);
+            Path outputFile = extractDir.resolve(normalizedName);
             //if file already exists, prepend uuid
-            if (outputFile.exists()) {
+            if (Files.exists(outputFile)) {
                 String fileName = FilenameUtils.getName(normalizedName);
-                outputFile = new File(extractDir, UUID
-                        .randomUUID()
-                        .toString() + "-" + fileName);
+                outputFile = extractDir.resolve( UUID.randomUUID() + "-" + 
fileName);
+            }
+            if (! 
outputFile.toAbsolutePath().normalize().startsWith(extractDir.toAbsolutePath().normalize()))
 {
+                throw new IOException("Path traversal?!: " + 
outputFile.toAbsolutePath());
             }
             return outputFile;
         }
@@ -1171,7 +1169,7 @@ public class TikaCLI {
                     return ext;
                 }
             } catch (MimeTypeException e) {
-                e.printStackTrace();
+                LOG.info("bad mime type?", e);
             }
             return ".bin";
 
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index ce6b91209..099c32921 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -295,7 +295,7 @@ public class TikaCLITest {
 
     @Test
     public void testExtractSimple() throws Exception {
-        String[] expectedChildren = new String[]{"MBD002B040A.cdx", 
"file4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file0.emf"};
+        String[] expectedChildren = new String[]{"MBD002B040A.cdx", 
"file_4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file_0.emf"};
         testExtract("/coffee.xls", expectedChildren, 8);
     }
 
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
 
b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
index 9142c9b7b..0da31981c 100644
--- 
a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
+++ 
b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
@@ -76,20 +76,23 @@ public class FileSystemEmitter extends AbstractEmitter 
implements StreamEmitter
     @Override
     public void emit(String emitKey, List<Metadata> metadataList, ParseContext 
parseContext) throws IOException, TikaEmitterException {
         Path output;
-        if (metadataList == null || metadataList.size() == 0) {
+        if (metadataList == null || metadataList.isEmpty()) {
             throw new TikaEmitterException("metadata list must not be null or 
of size 0");
         }
 
-        if (fileExtension != null && fileExtension.length() > 0) {
+        if (fileExtension != null && ! fileExtension.isEmpty()) {
             emitKey += "." + fileExtension;
         }
         if (basePath != null) {
             output = basePath.resolve(emitKey);
+            if 
(!output.toAbsolutePath().normalize().startsWith(basePath.toAbsolutePath().normalize()))
 {
+                throw new TikaEmitterException("path traversal?! " + 
output.toAbsolutePath());
+            }
         } else {
             output = Paths.get(emitKey);
         }
 
-        if (!Files.isDirectory(output.getParent())) {
+        if (output.getParent() != null && 
!Files.isDirectory(output.getParent())) {
             Files.createDirectories(output.getParent());
         }
         try (Writer writer = Files.newBufferedWriter(output, 
StandardCharsets.UTF_8)) {

Reply via email to