This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4581-rm-metadata-filter-where-unneeded
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 3fd062d2c5e19368aaac86153b327c044fc83f8f
Author: tallison <[email protected]>
AuthorDate: Wed Dec 17 17:20:53 2025 -0500

    TIKA-4581 - rm metadata filter where it isn't needed any more
---
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |  3 +-
 .../tika/sax/RecursiveParserWrapperHandler.java    | 22 +----------
 .../tika/parser/RecursiveParserWrapperTest.java    | 46 ----------------------
 .../apache/tika/parser/image/JpegParserTest.java   |  4 +-
 .../core/resource/RecursiveMetadataResource.java   |  8 +---
 5 files changed, 6 insertions(+), 77 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 411bd070a..a0f0ddb57 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -553,8 +553,7 @@ public class TikaCLI {
     private void handleRecursiveJson(URL url, OutputStream output) throws 
IOException, SAXException, TikaException {
         Metadata metadata = new Metadata();
         RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser);
-        RecursiveParserWrapperHandler handler = new 
RecursiveParserWrapperHandler(getContentHandlerFactory(type), -1,
-                tikaLoader.loadMetadataFilters());
+        RecursiveParserWrapperHandler handler = new 
RecursiveParserWrapperHandler(getContentHandlerFactory(type), -1);
         try (TikaInputStream tis = TikaInputStream.get(url, metadata)) {
             wrapper.parse(tis, handler, metadata, context);
         }
diff --git 
a/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
 
b/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
index b65fdbd61..154d5733a 100644
--- 
a/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
+++ 
b/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
@@ -26,11 +26,8 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
-import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.filter.MetadataFilter;
-import org.apache.tika.metadata.filter.NoOpFilter;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.utils.ParserUtils;
 
@@ -51,13 +48,12 @@ import org.apache.tika.utils.ParserUtils;
 public class RecursiveParserWrapperHandler extends 
AbstractRecursiveParserWrapperHandler {
 
     protected final List<Metadata> metadataList = new LinkedList<>();
-    private final MetadataFilter metadataFilter;
 
     /**
      * Create a handler with no limit on the number of embedded resources
      */
     public RecursiveParserWrapperHandler(ContentHandlerFactory 
contentHandlerFactory) {
-        this(contentHandlerFactory, -1, NoOpFilter.NOOP_FILTER);
+        super(contentHandlerFactory, -1);
     }
 
     /**
@@ -68,13 +64,7 @@ public class RecursiveParserWrapperHandler extends 
AbstractRecursiveParserWrappe
      */
     public RecursiveParserWrapperHandler(ContentHandlerFactory 
contentHandlerFactory,
                                          int maxEmbeddedResources) {
-        this(contentHandlerFactory, maxEmbeddedResources, 
NoOpFilter.NOOP_FILTER);
-    }
-
-    public RecursiveParserWrapperHandler(ContentHandlerFactory 
contentHandlerFactory,
-                                         int maxEmbeddedResources, 
MetadataFilter metadataFilter) {
         super(contentHandlerFactory, maxEmbeddedResources);
-        this.metadataFilter = metadataFilter;
     }
 
     /**
@@ -102,11 +92,6 @@ public class RecursiveParserWrapperHandler extends 
AbstractRecursiveParserWrappe
             throws SAXException {
         super.endEmbeddedDocument(contentHandler, metadata);
         addContent(contentHandler, metadata);
-        try {
-            metadataFilter.filter(List.of(metadata));
-        } catch (TikaException e) {
-            throw new SAXException(e);
-        }
 
         if (metadata.size() > 0) {
             metadataList.add(ParserUtils.cloneMetadata(metadata));
@@ -122,11 +107,6 @@ public class RecursiveParserWrapperHandler extends 
AbstractRecursiveParserWrappe
     public void endDocument(ContentHandler contentHandler, Metadata metadata) 
throws SAXException {
         super.endDocument(contentHandler, metadata);
         addContent(contentHandler, metadata);
-        try {
-            metadataFilter.filter(List.of(metadata));
-        } catch (TikaException e) {
-            throw new SAXException(e);
-        }
         if (metadata.size() > 0) {
             metadataList.add(0, ParserUtils.cloneMetadata(metadata));
         }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index 3cabccdfa..e009d65b9 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -20,7 +20,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -33,16 +32,13 @@ import org.apache.commons.io.input.ClosedInputStream;
 import org.apache.commons.io.input.ProxyInputStream;
 import org.junit.jupiter.api.Test;
 
-import org.apache.tika.TikaLoaderHelper;
 import org.apache.tika.TikaTest;
-import org.apache.tika.config.loader.TikaLoader;
 import org.apache.tika.digest.DigestDef;
 import org.apache.tika.digest.Digester;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.parser.digestutils.CommonsDigester;
 import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
 import org.apache.tika.sax.BasicContentHandlerFactory;
@@ -393,48 +389,6 @@ public class RecursiveParserWrapperTest extends TikaTest {
 
     }
 
-    @Test
-    public void testIncludeFilter() throws Exception {
-        //TIKA-3137
-        ParseContext context = new ParseContext();
-        Metadata metadata = new Metadata();
-        TikaLoader tikaLoader = 
TikaLoaderHelper.getLoader("TIKA-3137-include.json");
-        Parser p = tikaLoader.loadAutoDetectParser();
-        MetadataFilter metadataFilter = tikaLoader.loadMetadataFilters();
-        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p, true);
-        String path = "/test-documents/test_recursive_embedded.docx";
-        ContentHandlerFactory contentHandlerFactory =
-                new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1);
-
-        RecursiveParserWrapperHandler handler =
-                new RecursiveParserWrapperHandler(contentHandlerFactory, -1,
-                        metadataFilter);
-        try (TikaInputStream tis = getResourceAsStream(path)) {
-            wrapper.parse(tis, handler, metadata, context);
-        }
-        List<Metadata> metadataList = handler.getMetadataList();
-        assertEquals(5, metadataList.size());
-
-        Set<String> expectedKeys = new HashSet<>();
-        expectedKeys.add("X-TIKA:content");
-        expectedKeys.add("extended-properties:Application");
-        expectedKeys.add("Content-Type");
-        for (Metadata m : metadataList) {
-            if (m.get(Metadata.CONTENT_TYPE).equals("image/emf")) {
-                fail("emf should have been filtered out");
-            }
-            if (m.get(Metadata.CONTENT_TYPE).startsWith("text/plain")) {
-                fail("text/plain should have been filtered out");
-            }
-            assertTrue(m.names().length >= 2);
-            for (String n : m.names()) {
-                if (!expectedKeys.contains(n)) {
-                    fail("didn't expect " + n);
-                }
-            }
-        }
-    }
-
     @SuppressWarnings("deprecation")
     private List<Metadata> getMetadata(Metadata metadata,
                                        ContentHandlerFactory 
contentHandlerFactory,
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
index c811b9356..11af0ccef 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
@@ -45,12 +45,12 @@ public class JpegParserTest extends TikaTest {
 
         RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p);
         RecursiveParserWrapperHandler handler =
-                new RecursiveParserWrapperHandler(new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1), 
1000, metadataFilter);
+                new RecursiveParserWrapperHandler(new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1), 
1000);
         try (InputStream is = 
getResourceAsStream("/test-documents/testJPEG_GEO_2.jpg")) {
             wrapper.parse(TikaInputStream.get(is), handler, new Metadata(), 
new ParseContext());
         }
         List<Metadata> metadataList = handler.getMetadataList();
-
+        metadataList = metadataFilter.filter(metadataList);
         Metadata metadata = metadataList.get(0);
         // Geo tags should be there with 5dp, and not rounded
         assertEquals("51.575762", metadata.get(Metadata.LATITUDE));
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
index 3f71ae67e..61d6f1440 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
@@ -72,9 +72,7 @@ public class RecursiveMetadataResource {
         BasicContentHandlerFactory.HANDLER_TYPE type = handlerConfig.getType();
         RecursiveParserWrapperHandler handler =
                 new RecursiveParserWrapperHandler(new 
BasicContentHandlerFactory(type, handlerConfig.getWriteLimit(), 
handlerConfig.isThrowOnWriteLimitReached(), context),
-                        handlerConfig.getMaxEmbeddedResources(), TikaResource
-                        .getTikaLoader()
-                        .loadMetadataFilters());
+                        handlerConfig.getMaxEmbeddedResources());
         try {
             TikaResource.parse(wrapper, LOG, "/rmeta", tis, handler, metadata, 
context);
         } catch (TikaServerParseException e) {
@@ -179,9 +177,7 @@ public class RecursiveMetadataResource {
         BasicContentHandlerFactory.HANDLER_TYPE type = handlerConfig.getType();
         RecursiveParserWrapperHandler handler =
                 new RecursiveParserWrapperHandler(new 
BasicContentHandlerFactory(type, handlerConfig.getWriteLimit(), 
handlerConfig.isThrowOnWriteLimitReached(), context),
-                        handlerConfig.getMaxEmbeddedResources(), TikaResource
-                        .getTikaLoader()
-                        .loadMetadataFilters());
+                        handlerConfig.getMaxEmbeddedResources());
         try {
             TikaResource.parse(wrapper, LOG, "/rmeta/config", tis, handler, 
metadata, context);
         } catch (TikaServerParseException e) {

Reply via email to