This is an automated email from the ASF dual-hosted git repository. ndipiazza pushed a commit to branch TIKA-4851-revert in repository https://gitbox.apache.org/repos/asf/tika.git
commit 771b80c7ef9241799d85379ea25dce291fc99ece Author: Nicholas DiPiazza <[email protected]> AuthorDate: Fri Dec 19 13:38:22 2025 -0600 Revert "TIKA-4581 - fix faulty logic in PipesServer and add intermediate results for concatenated parsing (#2469)" This reverts commit 1a3d4828f48c742b76b9eb72643d33b4b3de0ab7. --- .../org/apache/tika/pipes/core/server/ParseHandler.java | 6 ++---- .../org/apache/tika/pipes/core/server/PipesServer.java | 14 +++++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java index a395677c9..7e670c63a 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java @@ -169,7 +169,7 @@ class ParseHandler { public List<Metadata> parseConcatenated(FetchEmitTuple fetchEmitTuple, HandlerConfig handlerConfig, TikaInputStream stream, - Metadata metadata, ParseContext parseContext) throws InterruptedException { + Metadata metadata, ParseContext parseContext) { ContentHandlerFactory contentHandlerFactory = new BasicContentHandlerFactory(handlerConfig.getType(), @@ -193,9 +193,7 @@ class ParseHandler { String containerException = null; long start = System.currentTimeMillis(); preParse(fetchEmitTuple, stream, metadata, parseContext); - //queue better be empty. we deserve an exception if not - intermediateResult.add(metadata); - countDownLatch.await(); + //TODO -- add intermediate try { autoDetectParser.parse(stream, handler, metadata, parseContext); } catch (SAXException e) { diff --git a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java index dd09db768..94c66477e 100644 --- a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java +++ b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java @@ -474,11 +474,15 @@ public class PipesServer implements AutoCloseable { // User doesn't want container documents digested this.digester = null; } - - // If the user hasn't configured an embedded document extractor, set up the - // RUnpackExtractorFactory - if (autoDetectParser.getAutoDetectParserConfig().getEmbeddedDocumentExtractorFactory() == null) { - autoDetectParser.getAutoDetectParserConfig().setEmbeddedDocumentExtractorFactory(new RUnpackExtractorFactory()); + if (this.digester != null) { + // If the user hasn't configured an embedded document extractor, set up the + // RUnpackExtractorFactory + if (autoDetectParser.getAutoDetectParserConfig() + .getEmbeddedDocumentExtractorFactory() == null) { + autoDetectParser + .getAutoDetectParserConfig().setEmbeddedDocumentExtractorFactory( + new RUnpackExtractorFactory()); + } } this.detector = this.autoDetectParser.getDetector(); this.rMetaParser = new RecursiveParserWrapper(autoDetectParser);
