This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4585-simplify-serialization
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 05e3fc4d9056330f51002aee96519970127bea49
Author: tallison <[email protected]>
AuthorDate: Sat Dec 20 23:29:18 2025 -0500

    TIKA-4585 -- further progress
---
 .../test/java/org/apache/tika/cli/TikaCLITest.java | 13 +++---
 .../java/org/apache/tika/parser/ParseContext.java  | 13 +++++-
 .../filter/AttachmentCountingListFilter.java       | 47 ++++++++++++++++++++++
 .../tika/metadata/filter/MockUpperCaseFilter.java  | 40 ++++++++++++++++++
 .../tika/server/core/resource/TikaResource.java    | 14 ++++++-
 .../core/TikaServerPipesIntegrationTest.java       |  2 +-
 .../standard/RecursiveMetadataResourceTest.java    |  5 ++-
 .../tika/server/standard/TikaParsersTest.java      | 10 +++--
 .../apache/tika/server/standard/TikaPipesTest.java |  4 +-
 9 files changed, 128 insertions(+), 20 deletions(-)

diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index 73e167d60..f09338059 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -38,8 +38,6 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
-import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Disabled;
@@ -237,7 +235,7 @@ public class TikaCLITest {
     public void testJsonMetadataPrettyPrintOutput() throws Exception {
         String json = getParamOutContent("--json", "-r", resourcePrefix + 
"testJsonMultipleInts.html");
 
-        assertTrue(json.contains("org.apache.tika.parser.CompositeParser\", 
\"org.apache.tika.parser.html.JSoupParser"));
+        assertTrue(json.contains("org.apache.tika.parser.DefaultParser\", 
\"org.apache.tika.parser.html.JSoupParser"));
         //test pretty-print alphabetic sort of keys
         int enc = json.indexOf("\"Content-Encoding\"");
         int fb = json.indexOf("fb:admins");
@@ -413,22 +411,23 @@ public class TikaCLITest {
         final Set<String> names = new HashSet<>();
         Files.walkFileTree(extractDir, new FileVisitor<Path>() {
             @Override
-            public @NotNull FileVisitResult preVisitDirectory(Path path, 
@NotNull BasicFileAttributes basicFileAttributes) throws IOException {
+            public FileVisitResult preVisitDirectory(Path path, 
BasicFileAttributes basicFileAttributes) throws IOException {
                 return FileVisitResult.CONTINUE;
             }
 
-            public @NotNull FileVisitResult visitFile(Path path, @NotNull 
BasicFileAttributes basicFileAttributes) throws IOException {
+            @Override
+            public FileVisitResult visitFile(Path path, BasicFileAttributes 
basicFileAttributes) throws IOException {
                 names.add(extractDir.relativize(path).toString().replace('\\', 
'/'));
                 return FileVisitResult.CONTINUE;
             }
 
             @Override
-            public @NotNull FileVisitResult visitFileFailed(Path path, 
@NotNull IOException e) throws IOException {
+            public FileVisitResult visitFileFailed(Path path, IOException e) 
throws IOException {
                 return FileVisitResult.CONTINUE;
             }
 
             @Override
-            public @NotNull FileVisitResult postVisitDirectory(Path path, 
@Nullable IOException e) throws IOException {
+            public FileVisitResult postVisitDirectory(Path path, IOException 
e) throws IOException {
                 return FileVisitResult.CONTINUE;
             }
         });
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java 
b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index 137fa9bca..c4587c1fd 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -51,8 +51,9 @@ public class ParseContext implements Serializable {
     /**
      * Cache of resolved objects from jsonConfigs, keyed by component name.
      * This is ignored during serialization to preserve round-trip fidelity.
+     * Note: Not final because Java serialization bypasses constructor 
initialization.
      */
-    private final transient Map<String, Object> resolvedConfigs = new 
HashMap<>();
+    private transient Map<String, Object> resolvedConfigs = new HashMap<>();
 
     /**
      * Adds the given value to the context as an implementation of the given
@@ -120,7 +121,9 @@ public class ParseContext implements Serializable {
             jsonConfigs.put(name, config);
         } else {
             jsonConfigs.remove(name);
-            resolvedConfigs.remove(name);
+            if (resolvedConfigs != null) {
+                resolvedConfigs.remove(name);
+            }
         }
     }
 
@@ -178,6 +181,9 @@ public class ParseContext implements Serializable {
      */
     @SuppressWarnings("unchecked")
     public <T> T getResolvedConfig(String name) {
+        if (resolvedConfigs == null) {
+            return null;
+        }
         return (T) resolvedConfigs.get(name);
     }
 
@@ -191,6 +197,9 @@ public class ParseContext implements Serializable {
      * @since Apache Tika 4.0
      */
     public void setResolvedConfig(String name, Object config) {
+        if (resolvedConfigs == null) {
+            resolvedConfigs = new HashMap<>();
+        }
         if (config != null) {
             resolvedConfigs.put(name, config);
         } else {
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
new file mode 100644
index 000000000..fdf4f1c22
--- /dev/null
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import java.util.List;
+
+import org.apache.tika.config.TikaComponent;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+@TikaComponent
+public class AttachmentCountingListFilter extends MetadataFilter {
+
+    private Integer count = 0;
+
+    @Override
+    public List<Metadata> filter(List<Metadata> metadataList) throws 
TikaException {
+        if (metadataList == null || metadataList.isEmpty()) {
+            return metadataList;
+        }
+        metadataList.get(0).set("X-TIKA:attachment_count", 
Integer.toString(metadataList.size() - 1));
+        count += metadataList.size();
+        return metadataList;
+    }
+
+    public Integer getCount() {
+        return count;
+    }
+
+    public void setCount(Integer count) {
+        this.count = count;
+    }
+}
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
new file mode 100644
index 000000000..a57b5dd7d
--- /dev/null
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata.filter;
+
+import java.util.Locale;
+
+import org.apache.tika.config.TikaComponent;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ * Mock Filter for testing uppercasing of all values
+ */
+@TikaComponent
+public class MockUpperCaseFilter extends MetadataFilterBase {
+
+    @Override
+    protected void filter(Metadata metadata) {
+        for (String n : metadata.names()) {
+            String[] vals = metadata.getValues(n);
+            metadata.remove(n);
+            for (String val : vals) {
+                metadata.add(n, val.toUpperCase(Locale.US));
+            }
+        }
+    }
+}
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index 71327764a..4729e66a4 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -62,6 +62,7 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 import org.apache.tika.Tika;
+import org.apache.tika.config.JsonConfig;
 import org.apache.tika.config.TikaTaskTimeout;
 import org.apache.tika.config.loader.TikaLoader;
 import org.apache.tika.exception.EncryptedDocumentException;
@@ -78,8 +79,8 @@ import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ExpandedTitleContentHandler;
 import org.apache.tika.sax.RichTextContentHandler;
 import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
-import org.apache.tika.serialization.serdes.ParseContextDeserializer;
 import org.apache.tika.serialization.ParseContextUtils;
+import org.apache.tika.serialization.serdes.ParseContextDeserializer;
 import org.apache.tika.server.core.InputStreamFactory;
 import org.apache.tika.server.core.ServerStatus;
 import org.apache.tika.server.core.TikaServerConfig;
@@ -148,8 +149,17 @@ public class TikaResource {
         ObjectMapper mapper = new ObjectMapper();
         JsonNode root = mapper.readTree(configJson);
         // Use root directly - the JSON should contain parser configs at the 
top level
-        ParseContext configuredContext = 
ParseContextDeserializer.readParseContext(root);
+        ParseContext configuredContext = 
ParseContextDeserializer.readParseContext(root, mapper);
+
+        // Copy jsonConfigs first (for SelfConfiguring parsers like PDFParser)
+        for (Map.Entry<String, JsonConfig> entry : 
configuredContext.getJsonConfigs().entrySet()) {
+            context.setJsonConfig(entry.getKey(), entry.getValue());
+        }
+
+        // Then resolve all configs to typed objects
         ParseContextUtils.resolveAll(configuredContext, 
Thread.currentThread().getContextClassLoader());
+
+        // Copy resolved typed objects from contextMap
         for (Map.Entry<String, Object> entry : 
configuredContext.getContextMap().entrySet()) {
             try {
                 Class<?> clazz = Class.forName(entry.getKey());
diff --git 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerPipesIntegrationTest.java
 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerPipesIntegrationTest.java
index 5ba494c44..e7957c9df 100644
--- 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerPipesIntegrationTest.java
+++ 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerPipesIntegrationTest.java
@@ -222,7 +222,7 @@ public class TikaServerPipesIntegrationTest extends 
IntegrationTestBase {
     private String getJsonStringWithTimeout(String fileName, long 
timeoutMillis) throws IOException {
         ParseContext parseContext = new ParseContext();
         parseContext.set(HandlerConfig.class, DEFAULT_HANDLER_CONFIG);
-        parseContext.addConfig("tika-task-timeout", "{\"timeoutMillis\":" + 
timeoutMillis + "}");
+        parseContext.setJsonConfig("tika-task-timeout", "{\"timeoutMillis\":" 
+ timeoutMillis + "}");
 
         FetchEmitTuple t = new FetchEmitTuple(fileName,
                 new FetchKey(CXFTestBase.FETCHER_ID, fileName),
diff --git 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
index 33aae76b8..8dcd90a29 100644
--- 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
+++ 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/RecursiveMetadataResourceTest.java
@@ -118,9 +118,10 @@ public class RecursiveMetadataResourceTest extends 
CXFTestBase {
                 .get(0)
                 .getValues(TikaCoreProperties.TIKA_PARSED_BY);
         //make sure the CompressorParser doesn't show up here
-        assertEquals(2, parsedBy.length);
+        assertEquals(3, parsedBy.length);
         assertEquals("org.apache.tika.parser.CompositeParser", parsedBy[0]);
-        assertEquals("org.apache.tika.parser.microsoft.ooxml.OOXMLParser", 
parsedBy[1]);
+        assertEquals("org.apache.tika.parser.DefaultParser", parsedBy[1]);
+        assertEquals("org.apache.tika.parser.microsoft.ooxml.OOXMLParser", 
parsedBy[2]);
 
         //test that the rest is as it should be
         assertEquals(12, metadataList.size());
diff --git 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaParsersTest.java
 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaParsersTest.java
index 64a5e090e..1eae7ba45 100644
--- 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaParsersTest.java
+++ 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaParsersTest.java
@@ -100,9 +100,9 @@ public class TikaParsersTest extends CXFTestBase {
             String text = getStringFromInputStream((InputStream) 
response.getEntity());
             assertContains("Composite", text);
 
-            assertContains("<h3>OpusParser", text);
-            assertContains("<h3>PackageParser", text);
-            assertContains("<h3>OOXMLParser", text);
+            assertContains("<h4>OpusParser", text);
+            assertContains("<h4>PackageParser", text);
+            assertContains("<h4>OOXMLParser", text);
 
             assertContains(OpusParser.class.getName(), text);
             assertContains(PackageParser.class.getName(), text);
@@ -145,7 +145,9 @@ public class TikaParsersTest extends CXFTestBase {
             assertEquals(Boolean.TRUE, json.get("composite"));
 
             // At least 20 child parsers which aren't composite, except for 
CompositeExternalParser
-            List<Object> children = (List) json.get("children");
+            List<Object> wrapper = (List) json.get("children");
+            Map<String, Object> firstItem = (Map) wrapper.get(0);
+            List<Object> children = (List) firstItem.get("children");
             assertTrue(children.size() >= 2);
             boolean hasOpus = false, hasOOXML = false, hasZip = false;
             int nonComposite = 0;
diff --git 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
index ac82d820a..3761fb0bf 100644
--- 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
+++ 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
@@ -182,7 +182,7 @@ public class TikaPipesTest extends CXFTestBase {
     public void testConcatenated() throws Exception {
         ParseContext parseContext = new ParseContext();
         // Use addConfig with JSON for handler-config
-        parseContext.addConfig("handler-config",
+        parseContext.setJsonConfig("handler-config",
                 "{\"type\": \"TEXT\", \"parseMode\": \"CONCATENATE\", 
\"writeLimit\": -1, \"maxEmbeddedResources\": -1, \"throwOnWriteLimitReached\": 
true}");
 
         FetchEmitTuple t = new FetchEmitTuple("myId", new FetchKey(FETCHER_ID, 
"test_recursive_embedded.docx"),
@@ -212,7 +212,7 @@ public class TikaPipesTest extends CXFTestBase {
     public void testPDFConfig() throws Exception {
         ParseContext parseContext = new ParseContext();
         // Configure PDFParser via JSON config (pdf-parser is self-configuring)
-        parseContext.addConfig("pdf-parser", "{\"sortByPosition\": true}");
+        parseContext.setJsonConfig("pdf-parser", "{\"sortByPosition\": true}");
 
         FetchEmitTuple t = new FetchEmitTuple("myId", new FetchKey(FETCHER_ID, 
TEST_TWO_BOXES_PDF),
                 new EmitKey(EMITTER_JSON_ID, ""), new Metadata(), 
parseContext);

Reply via email to