This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 1a3d4828f TIKA-4581 - fix faulty logic in PipesServer and add
intermediate results for concatenated parsing (#2469)
1a3d4828f is described below
commit 1a3d4828f48c742b76b9eb72643d33b4b3de0ab7
Author: Tim Allison <[email protected]>
AuthorDate: Wed Dec 17 20:06:56 2025 -0500
TIKA-4581 - fix faulty logic in PipesServer and add intermediate results
for concatenated parsing (#2469)
---
.../org/apache/tika/pipes/core/server/ParseHandler.java | 6 ++++--
.../org/apache/tika/pipes/core/server/PipesServer.java | 14 +++++---------
2 files changed, 9 insertions(+), 11 deletions(-)
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
index 7e670c63a..a395677c9 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
@@ -169,7 +169,7 @@ class ParseHandler {
public List<Metadata> parseConcatenated(FetchEmitTuple fetchEmitTuple,
HandlerConfig handlerConfig,
TikaInputStream stream,
- Metadata metadata, ParseContext
parseContext) {
+ Metadata metadata, ParseContext
parseContext) throws InterruptedException {
ContentHandlerFactory contentHandlerFactory =
new BasicContentHandlerFactory(handlerConfig.getType(),
@@ -193,7 +193,9 @@ class ParseHandler {
String containerException = null;
long start = System.currentTimeMillis();
preParse(fetchEmitTuple, stream, metadata, parseContext);
- //TODO -- add intermediate
+ //queue better be empty. we deserve an exception if not
+ intermediateResult.add(metadata);
+ countDownLatch.await();
try {
autoDetectParser.parse(stream, handler, metadata, parseContext);
} catch (SAXException e) {
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
index 94c66477e..dd09db768 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
@@ -474,15 +474,11 @@ public class PipesServer implements AutoCloseable {
// User doesn't want container documents digested
this.digester = null;
}
- if (this.digester != null) {
- // If the user hasn't configured an embedded document extractor,
set up the
- // RUnpackExtractorFactory
- if (autoDetectParser.getAutoDetectParserConfig()
- .getEmbeddedDocumentExtractorFactory() == null) {
- autoDetectParser
-
.getAutoDetectParserConfig().setEmbeddedDocumentExtractorFactory(
- new RUnpackExtractorFactory());
- }
+
+ // If the user hasn't configured an embedded document extractor, set
up the
+ // RUnpackExtractorFactory
+ if
(autoDetectParser.getAutoDetectParserConfig().getEmbeddedDocumentExtractorFactory()
== null) {
+
autoDetectParser.getAutoDetectParserConfig().setEmbeddedDocumentExtractorFactory(new
RUnpackExtractorFactory());
}
this.detector = this.autoDetectParser.getDetector();
this.rMetaParser = new RecursiveParserWrapper(autoDetectParser);