This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new 0c89f4b89 Simplify path naming
0c89f4b89 is described below
commit 0c89f4b894395af57d441e0cbebdf2cb6657416d
Author: tallison <[email protected]>
AuthorDate: Mon Aug 4 11:40:29 2025 -0400
Simplify path naming
(cherry picked from commit 6e224a1fd33196e908b56f69db9e978f096fbc54)
# Conflicts:
# tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
---
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 36 ++++++++++------------
.../test/java/org/apache/tika/cli/TikaCLITest.java | 2 +-
.../tika/pipes/emitter/fs/FileSystemEmitter.java | 9 ++++--
3 files changed, 24 insertions(+), 23 deletions(-)
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 41eccad3c..3c1af1c65 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -21,7 +21,6 @@ import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
@@ -124,7 +123,7 @@ public class TikaCLI {
return new DefaultHandler();
}
};
- private File extractDir = new File(".");
+ private Path extractDir = Paths.get(".");
private ParseContext context;
private Detector detector;
private Parser parser;
@@ -339,7 +338,7 @@ public class TikaCLI {
pdfParserConfig.setExtractIncrementalUpdateInfo(true);
pdfParserConfig.setParseIncrementalUpdates(true);
String warn = "As a convenience, TikaCLI has turned on extraction
of\n" +
- "inline images and incremental updates for the PDFParser
(TIKA-2374, " +
+ "inline images and parsing of incremental updates for the
PDFParser (TIKA-2374, " +
"TIKA-4017 and TIKA-4354).\n" +
"This is not the default behavior in Tika generally or in
tika-server.";
LOG.info(warn);
@@ -441,7 +440,7 @@ public class TikaCLI {
if (dirPath.isEmpty()) {
dirPath = ".";
}
- extractDir = new File(dirPath);
+ extractDir = Paths.get(dirPath);
} else if (arg.equals("-z") || arg.equals("--extract")) {
type = NO_OUTPUT;
context.set(EmbeddedDocumentExtractor.class, new
FileEmbeddedDocumentExtractor());
@@ -1089,22 +1088,20 @@ public class TikaCLI {
MediaType contentType = detector.detect(inputStream, metadata);
String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
- File outputFile = null;
+ Path outputFile = null;
if (name == null) {
- name = "file" + count++;
+ name = "file_" + count++;
}
outputFile = getOutputFile(name, metadata, contentType);
- File parent = outputFile.getParentFile();
- if (!parent.exists()) {
- if (!parent.mkdirs()) {
- throw new IOException("unable to create directory \"" +
parent + "\"");
- }
+ Path parent = outputFile.getParent();
+ if (parent != null && ! Files.isDirectory(parent)) {
+ Files.createDirectories(parent);
}
System.out.println("Extracting '" + name + "' (" + contentType +
") to " + outputFile);
- try (FileOutputStream os = new FileOutputStream(outputFile)) {
+ try (OutputStream os = Files.newOutputStream(outputFile)) {
if (embeddedStreamTranslator.shouldTranslate(inputStream,
metadata)) {
try (InputStream translated =
embeddedStreamTranslator.translate(inputStream, metadata)) {
IOUtils.copy(translated, os);
@@ -1121,7 +1118,7 @@ public class TikaCLI {
}
}
- private File getOutputFile(String name, Metadata metadata, MediaType
contentType) {
+ private Path getOutputFile(String name, Metadata metadata, MediaType
contentType) throws IOException {
String ext = getExtension(contentType);
if (name.indexOf('.') == -1 && contentType != null) {
name += ext;
@@ -1148,13 +1145,14 @@ public class TikaCLI {
if (prefixLength > -1) {
normalizedName = normalizedName.substring(prefixLength);
}
- File outputFile = new File(extractDir, normalizedName);
+ Path outputFile = extractDir.resolve(normalizedName);
//if file already exists, prepend uuid
- if (outputFile.exists()) {
+ if (Files.exists(outputFile)) {
String fileName = FilenameUtils.getName(normalizedName);
- outputFile = new File(extractDir, UUID
- .randomUUID()
- .toString() + "-" + fileName);
+ outputFile = extractDir.resolve( UUID.randomUUID() + "-" +
fileName);
+ }
+ if (!
outputFile.toAbsolutePath().normalize().startsWith(extractDir.toAbsolutePath().normalize()))
{
+ throw new IOException("Path traversal?!: " +
outputFile.toAbsolutePath());
}
return outputFile;
}
@@ -1171,7 +1169,7 @@ public class TikaCLI {
return ext;
}
} catch (MimeTypeException e) {
- e.printStackTrace();
+ LOG.info("bad mime type?", e);
}
return ".bin";
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index ce6b91209..099c32921 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -295,7 +295,7 @@ public class TikaCLITest {
@Test
public void testExtractSimple() throws Exception {
- String[] expectedChildren = new String[]{"MBD002B040A.cdx",
"file4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file0.emf"};
+ String[] expectedChildren = new String[]{"MBD002B040A.cdx",
"file_4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file_0.emf"};
testExtract("/coffee.xls", expectedChildren, 8);
}
diff --git
a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
index 9142c9b7b..0da31981c 100644
---
a/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
+++
b/tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
@@ -76,20 +76,23 @@ public class FileSystemEmitter extends AbstractEmitter
implements StreamEmitter
@Override
public void emit(String emitKey, List<Metadata> metadataList, ParseContext
parseContext) throws IOException, TikaEmitterException {
Path output;
- if (metadataList == null || metadataList.size() == 0) {
+ if (metadataList == null || metadataList.isEmpty()) {
throw new TikaEmitterException("metadata list must not be null or
of size 0");
}
- if (fileExtension != null && fileExtension.length() > 0) {
+ if (fileExtension != null && ! fileExtension.isEmpty()) {
emitKey += "." + fileExtension;
}
if (basePath != null) {
output = basePath.resolve(emitKey);
+ if
(!output.toAbsolutePath().normalize().startsWith(basePath.toAbsolutePath().normalize()))
{
+ throw new TikaEmitterException("path traversal?! " +
output.toAbsolutePath());
+ }
} else {
output = Paths.get(emitKey);
}
- if (!Files.isDirectory(output.getParent())) {
+ if (output.getParent() != null &&
!Files.isDirectory(output.getParent())) {
Files.createDirectories(output.getParent());
}
try (Writer writer = Files.newBufferedWriter(output,
StandardCharsets.UTF_8)) {