This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 1a3d4828f TIKA-4581 - fix faulty logic in PipesServer and add 
intermediate results for concatenated parsing (#2469)
1a3d4828f is described below

commit 1a3d4828f48c742b76b9eb72643d33b4b3de0ab7
Author: Tim Allison <[email protected]>
AuthorDate: Wed Dec 17 20:06:56 2025 -0500

    TIKA-4581 - fix faulty logic in PipesServer and add intermediate results 
for concatenated parsing (#2469)
---
 .../org/apache/tika/pipes/core/server/ParseHandler.java    |  6 ++++--
 .../org/apache/tika/pipes/core/server/PipesServer.java     | 14 +++++---------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
index 7e670c63a..a395677c9 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
@@ -169,7 +169,7 @@ class ParseHandler {
 
     public List<Metadata> parseConcatenated(FetchEmitTuple fetchEmitTuple,
                                              HandlerConfig handlerConfig, 
TikaInputStream stream,
-                                             Metadata metadata, ParseContext 
parseContext) {
+                                             Metadata metadata, ParseContext 
parseContext) throws InterruptedException {
 
         ContentHandlerFactory contentHandlerFactory =
                 new BasicContentHandlerFactory(handlerConfig.getType(),
@@ -193,7 +193,9 @@ class ParseHandler {
         String containerException = null;
         long start = System.currentTimeMillis();
         preParse(fetchEmitTuple, stream, metadata, parseContext);
-        //TODO -- add intermediate
+        //queue better be empty. we deserve an exception if not
+        intermediateResult.add(metadata);
+        countDownLatch.await();
         try {
             autoDetectParser.parse(stream, handler, metadata, parseContext);
         } catch (SAXException e) {
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
index 94c66477e..dd09db768 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
@@ -474,15 +474,11 @@ public class PipesServer implements AutoCloseable {
             // User doesn't want container documents digested
             this.digester = null;
         }
-        if (this.digester != null) {
-            // If the user hasn't configured an embedded document extractor, 
set up the
-            // RUnpackExtractorFactory
-            if (autoDetectParser.getAutoDetectParserConfig()
-                    .getEmbeddedDocumentExtractorFactory() == null) {
-                autoDetectParser
-                        
.getAutoDetectParserConfig().setEmbeddedDocumentExtractorFactory(
-                                new RUnpackExtractorFactory());
-            }
+
+        // If the user hasn't configured an embedded document extractor, set 
up the
+        // RUnpackExtractorFactory
+        if 
(autoDetectParser.getAutoDetectParserConfig().getEmbeddedDocumentExtractorFactory()
 == null) {
+                
autoDetectParser.getAutoDetectParserConfig().setEmbeddedDocumentExtractorFactory(new
 RUnpackExtractorFactory());
         }
         this.detector = this.autoDetectParser.getDetector();
         this.rMetaParser = new RecursiveParserWrapper(autoDetectParser);

Reply via email to