This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4851-revert
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 44c6c56a4f020b1722b8d8ef6cd5c77d8fde42e3
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Fri Dec 19 13:38:24 2025 -0600

    Revert "Clean up jackson settings on metadata list 
serialization/deserialization (#2464)"
    
    This reverts commit fbf5f82527d95dba3c95b346d73d1bb4102c34d1.
---
 .../org/apache/tika/config/GlobalSettings.java     | 57 ++++++++++++++
 .../apache/tika/config/loader/TikaJsonConfig.java  |  2 +-
 .../org/apache/tika/config/loader/TikaLoader.java  | 60 +++------------
 .../apache/tika/serialization/JsonMetadata.java    | 89 +++++++++-------------
 .../tika/serialization/JsonMetadataList.java       | 74 ++++++------------
 .../test/resources/configs/tika-config-json.json   |  6 +-
 .../test/resources/configs/tika-config-json.json   |  4 +-
 7 files changed, 129 insertions(+), 163 deletions(-)

diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java 
b/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
index 7493000ae..7d07c3b9e 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
@@ -25,6 +25,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
  * <p>Example JSON:
  * <pre>
  * {
+ *   "maxJsonStringFieldLength": 50000000,
  *   "xml-reader-utils": {
  *     "maxEntityExpansions": 1000,
  *     "maxNumReuses": 100,
@@ -35,6 +36,20 @@ import com.fasterxml.jackson.annotation.JsonProperty;
  */
 public class GlobalSettings {
 
+    /**
+     * Static maximum length for JSON string fields.
+     * Default: 20,000,000 (Jackson's default)
+     * This is static because it's a global setting that affects all JSON 
parsing.
+     */
+    private static Integer maxJsonStringFieldLength = 20_000_000;
+
+    /**
+     * Instance field for deserialization from JSON.
+     * The value is copied to the static field when set.
+     */
+    @JsonProperty("maxJsonStringFieldLength")
+    private Integer instanceMaxJsonStringFieldLength = 20_000_000;
+
     /**
      * Service loader configuration for handling initialization problems.
      */
@@ -47,6 +62,48 @@ public class GlobalSettings {
     @JsonProperty("xml-reader-utils")
     private XmlReaderUtilsConfig xmlReaderUtils;
 
+    /**
+     * Gets the static maximum JSON string field length.
+     *
+     * @return the max length, or null if not set
+     */
+    public static Integer getMaxJsonStringFieldLength() {
+        return maxJsonStringFieldLength;
+    }
+
+    /**
+     * Sets the static maximum JSON string field length.
+     * This affects all JSON parsing globally.
+     *
+     * @param length the max length to set
+     */
+    public static void setMaxJsonStringFieldLength(Integer length) {
+        maxJsonStringFieldLength = length;
+    }
+
+    /**
+     * Instance getter for deserialization.
+     * Returns the instance value which may differ from the static value.
+     *
+     * @return the instance max length
+     */
+    public Integer getInstanceMaxJsonStringFieldLength() {
+        return instanceMaxJsonStringFieldLength;
+    }
+
+    /**
+     * Instance setter for deserialization.
+     * Automatically updates the static field when set.
+     *
+     * @param length the max length to set
+     */
+    public void setInstanceMaxJsonStringFieldLength(Integer length) {
+        this.instanceMaxJsonStringFieldLength = length;
+        if (length != null) {
+            setMaxJsonStringFieldLength(length);
+        }
+    }
+
     public ServiceLoaderConfig getServiceLoader() {
         return serviceLoader;
     }
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
index 8ce14a30f..2eeb8bc7a 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
@@ -103,7 +103,7 @@ public class TikaJsonConfig {
      */
     private static final Set<String> KNOWN_KEYS = Set.of(
             // Globals
-            "metadata-list",
+            "maxJsonStringFieldLength",
             "service-loader",
             "xml-reader-utils",
             // Core Tika component keys
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
index b82beb7d8..52e17d9d1 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
@@ -21,8 +21,6 @@ import java.nio.file.Path;
 import java.util.Collections;
 import java.util.List;
 
-import com.fasterxml.jackson.core.StreamReadConstraints;
-import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
 import org.apache.tika.config.GlobalSettings;
@@ -41,8 +39,6 @@ import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.renderer.CompositeRenderer;
 import org.apache.tika.renderer.Renderer;
-import org.apache.tika.serialization.JsonMetadata;
-import org.apache.tika.serialization.JsonMetadataList;
 
 /**
  * Main entry point for loading Tika components from JSON configuration.
@@ -400,18 +396,17 @@ public class TikaLoader {
      *
      * <p>Settings include:
      * <ul>
-     *   <li>metadata-list - Jackson StreamReadConstraints for 
JsonMetadata/JsonMetadataList serialization</li>
-     *   <li>service-loader - Service loader configuration</li>
+     *   <li>maxJsonStringFieldLength - Maximum JSON string field length 
(static, affects all JSON parsing)</li>
+     *   <li>service-loader.initializableProblemHandler - How to handle 
initialization problems</li>
      *   <li>xml-reader-utils - XML parser security settings</li>
      * </ul>
      *
      * <p>Example JSON:
      * <pre>
      * {
-     *   "metadata-list": {
-     *     "maxStringLength": 50000000,
-     *     "maxNestingDepth": 10,
-     *     "maxNumberLength": 500
+     *   "maxJsonStringFieldLength": 50000000,
+     *   "service-loader": {
+     *     "initializableProblemHandler": "ignore"
      *   },
      *   "xml-reader-utils": {
      *     "maxEntityExpansions": 1000,
@@ -428,8 +423,11 @@ public class TikaLoader {
         if (globalSettings == null) {
             globalSettings = new GlobalSettings();
 
-            // Load metadata-list config for JsonMetadata/JsonMetadataList 
serialization
-            loadMetadataListConfig();
+            // Load maxJsonStringFieldLength from top level and set it 
statically
+            if (config.getRootNode().has("maxJsonStringFieldLength")) {
+                GlobalSettings.setMaxJsonStringFieldLength(
+                        
config.getRootNode().get("maxJsonStringFieldLength").asInt());
+            }
 
             // Load service-loader config (official Tika config at root level)
             GlobalSettings.ServiceLoaderConfig serviceLoaderConfig =
@@ -448,44 +446,6 @@ public class TikaLoader {
         return globalSettings;
     }
 
-    /**
-     * Loads the metadata-list configuration section and applies it to
-     * JsonMetadata and JsonMetadataList serializers.
-     * <p>
-     * Configuration uses Jackson's StreamReadConstraints property names:
-     * <pre>
-     * {
-     *   "metadata-list": {
-     *     "maxStringLength": 20000000,
-     *     "maxNestingDepth": 10,
-     *     "maxNumberLength": 500
-     *   }
-     * }
-     * </pre>
-     */
-    private void loadMetadataListConfig() {
-        JsonNode metadataListNode = config.getRootNode().get("metadata-list");
-        if (metadataListNode == null) {
-            return;
-        }
-
-        StreamReadConstraints.Builder builder = 
StreamReadConstraints.builder();
-
-        if (metadataListNode.has("maxStringLength")) {
-            
builder.maxStringLength(metadataListNode.get("maxStringLength").asInt());
-        }
-        if (metadataListNode.has("maxNestingDepth")) {
-            
builder.maxNestingDepth(metadataListNode.get("maxNestingDepth").asInt());
-        }
-        if (metadataListNode.has("maxNumberLength")) {
-            
builder.maxNumberLength(metadataListNode.get("maxNumberLength").asInt());
-        }
-
-        StreamReadConstraints constraints = builder.build();
-        JsonMetadata.setStreamReadConstraints(constraints);
-        JsonMetadataList.setStreamReadConstraints(constraints);
-    }
-
     /**
      * Gets the global settings if they have been loaded.
      *
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
index 504fb4f19..e9adec234 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
@@ -26,69 +26,22 @@ import com.fasterxml.jackson.core.StreamReadConstraints;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.module.SimpleModule;
 
+import org.apache.tika.config.GlobalSettings;
 import org.apache.tika.metadata.Metadata;
 
 public class JsonMetadata {
 
     static volatile boolean PRETTY_PRINT = false;
 
-    /**
-     * Default stream read constraints for metadata serialization.
-     */
-    private static final StreamReadConstraints DEFAULT_CONSTRAINTS = 
StreamReadConstraints
-            .builder()
-            .maxNestingDepth(10)
-            .maxStringLength(20_000_000)
-            .maxNumberLength(500)
-            .build();
-
-    private static volatile StreamReadConstraints streamReadConstraints = 
DEFAULT_CONSTRAINTS;
-    private static volatile ObjectMapper OBJECT_MAPPER;
-    private static volatile ObjectMapper PRETTY_SERIALIZER;
+    private static ObjectMapper OBJECT_MAPPER;
+    private static final ObjectMapper PRETTY_SERIALIZER;
 
     static {
-        rebuildObjectMappers();
-    }
-
-    private static void rebuildObjectMappers() {
-        JsonFactory factory = new JsonFactory();
-        factory.setStreamReadConstraints(streamReadConstraints);
-
-        ObjectMapper mapper = new ObjectMapper(factory);
-        SimpleModule baseModule = new SimpleModule();
-        baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
-        baseModule.addSerializer(Metadata.class, new MetadataSerializer());
-        mapper.registerModule(baseModule);
-        OBJECT_MAPPER = mapper;
-
-        ObjectMapper prettyMapper = new ObjectMapper(factory);
+        OBJECT_MAPPER = 
buildObjectMapper(StreamReadConstraints.DEFAULT_MAX_STRING_LEN);
+        PRETTY_SERIALIZER = new ObjectMapper();
         SimpleModule prettySerializerModule = new SimpleModule();
         prettySerializerModule.addSerializer(Metadata.class, new 
MetadataSerializer(true));
-        prettyMapper.registerModule(prettySerializerModule);
-        PRETTY_SERIALIZER = prettyMapper;
-    }
-
-    /**
-     * Sets the stream read constraints for JSON parsing of metadata.
-     * This affects all subsequent calls to {@link #fromJson(Reader)}.
-     * <p>
-     * Typically called by TikaLoader during initialization based on the
-     * "metadata-list" configuration section.
-     *
-     * @param constraints the constraints to use
-     */
-    public static synchronized void 
setStreamReadConstraints(StreamReadConstraints constraints) {
-        streamReadConstraints = constraints;
-        rebuildObjectMappers();
-    }
-
-    /**
-     * Gets the current stream read constraints.
-     *
-     * @return the current constraints
-     */
-    public static StreamReadConstraints getStreamReadConstraints() {
-        return streamReadConstraints;
+        PRETTY_SERIALIZER.registerModule(prettySerializerModule);
     }
 
     /**
@@ -109,20 +62,46 @@ public class JsonMetadata {
     }
 
     /**
-     * Read metadata from reader. This does not close the reader.
+     * Read metadata from reader.
+     * <p>
+     * This does not close the reader.
+     * <p>
+     * This will reset the OBJECT_MAPPER if the max string length differs from 
that in TikaConfig.
      *
      * @param reader reader to read from
-     * @return Metadata or null if reader is null
+     * @return Metadata or null if nothing could be read from the reader
      * @throws IOException in case of parse failure or IO failure with Reader
      */
     public static Metadata fromJson(Reader reader) throws IOException {
         if (reader == null) {
             return null;
         }
+        if (OBJECT_MAPPER
+                .getFactory()
+                .streamReadConstraints()
+                .getMaxStringLength() != 
GlobalSettings.getMaxJsonStringFieldLength()) {
+            OBJECT_MAPPER = 
buildObjectMapper(GlobalSettings.getMaxJsonStringFieldLength());
+        }
         return OBJECT_MAPPER.readValue(reader, Metadata.class);
     }
 
     public static void setPrettyPrinting(boolean prettyPrint) {
         PRETTY_PRINT = prettyPrint;
     }
+
+    static ObjectMapper buildObjectMapper(int maxStringLen) {
+        JsonFactory factory = new JsonFactory();
+        factory.setStreamReadConstraints(StreamReadConstraints
+                .builder()
+                .maxNestingDepth(10)
+                .maxStringLength(maxStringLen)
+                .maxNumberLength(500)
+                .build());
+        ObjectMapper objectMapper = new ObjectMapper(factory);
+        SimpleModule baseModule = new SimpleModule();
+        baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
+        baseModule.addSerializer(Metadata.class, new MetadataSerializer());
+        objectMapper.registerModule(baseModule);
+        return objectMapper;
+    }
 }
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
index 7611cdfea..71427947b 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.serialization;
 
+import static org.apache.tika.serialization.JsonMetadata.buildObjectMapper;
+
 import java.io.IOException;
 import java.io.Reader;
 import java.io.Writer;
@@ -27,69 +29,36 @@ import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.module.SimpleModule;
 
+import org.apache.tika.config.GlobalSettings;
 import org.apache.tika.metadata.Metadata;
 
 public class JsonMetadataList {
 
     static volatile boolean PRETTY_PRINT = false;
 
-    /**
-     * Default stream read constraints for metadata list serialization.
-     */
-    private static final StreamReadConstraints DEFAULT_CONSTRAINTS = 
StreamReadConstraints
-            .builder()
-            .maxNestingDepth(10)
-            .maxStringLength(20_000_000)
-            .maxNumberLength(500)
-            .build();
-
-    private static volatile StreamReadConstraints streamReadConstraints = 
DEFAULT_CONSTRAINTS;
-    private static volatile ObjectMapper OBJECT_MAPPER;
-    private static volatile ObjectMapper PRETTY_SERIALIZER;
+    private static ObjectMapper OBJECT_MAPPER;
+    private static final ObjectMapper PRETTY_SERIALIZER;
 
     static {
-        rebuildObjectMappers();
-    }
-
-    private static void rebuildObjectMappers() {
         JsonFactory factory = new JsonFactory();
-        factory.setStreamReadConstraints(streamReadConstraints);
-
-        ObjectMapper mapper = new ObjectMapper(factory);
+        factory.setStreamReadConstraints(StreamReadConstraints
+                .builder()
+                .maxNestingDepth(10)
+                .maxStringLength(GlobalSettings.getMaxJsonStringFieldLength())
+                .maxNumberLength(500)
+//                                                              
.maxDocumentLength(1000000)
+                .build());
+        OBJECT_MAPPER = new ObjectMapper(factory);
         SimpleModule baseModule = new SimpleModule();
         baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
         baseModule.addSerializer(Metadata.class, new MetadataSerializer());
-        mapper.registerModule(baseModule);
-        OBJECT_MAPPER = mapper;
+        OBJECT_MAPPER.registerModule(baseModule);
 
-        ObjectMapper prettyMapper = new ObjectMapper(factory);
+        PRETTY_SERIALIZER = new ObjectMapper(factory);
         SimpleModule prettySerializerModule = new SimpleModule();
         prettySerializerModule.addSerializer(Metadata.class, new 
MetadataSerializer(true));
-        prettyMapper.registerModule(prettySerializerModule);
-        PRETTY_SERIALIZER = prettyMapper;
-    }
-
-    /**
-     * Sets the stream read constraints for JSON parsing of metadata lists.
-     * This affects all subsequent calls to {@link #fromJson(Reader)}.
-     * <p>
-     * Typically called by TikaLoader during initialization based on the
-     * "metadata-list" configuration section.
-     *
-     * @param constraints the constraints to use
-     */
-    public static synchronized void 
setStreamReadConstraints(StreamReadConstraints constraints) {
-        streamReadConstraints = constraints;
-        rebuildObjectMappers();
-    }
+        PRETTY_SERIALIZER.registerModule(prettySerializerModule);
 
-    /**
-     * Gets the current stream read constraints.
-     *
-     * @return the current constraints
-     */
-    public static StreamReadConstraints getStreamReadConstraints() {
-        return streamReadConstraints;
     }
 
     /**
@@ -120,16 +89,21 @@ public class JsonMetadataList {
     }
 
     /**
-     * Read metadata from reader. This does not close the reader.
+     * Read metadata from reader. This does not close the reader
      *
-     * @param reader the reader to read from
-     * @return Metadata list or null if reader is null
+     * @param reader
+     * @return Metadata or null if nothing could be read from the reader
      * @throws IOException in case of parse failure or IO failure with Reader
      */
     public static List<Metadata> fromJson(Reader reader) throws IOException {
         if (reader == null) {
             return null;
         }
+        if 
(OBJECT_MAPPER.getFactory().streamReadConstraints().getMaxStringLength()
+                != GlobalSettings.getMaxJsonStringFieldLength()) {
+            OBJECT_MAPPER = 
buildObjectMapper(GlobalSettings.getMaxJsonStringFieldLength());
+        }
+
         return OBJECT_MAPPER.readValue(reader, new 
TypeReference<List<Metadata>>(){});
     }
 
diff --git 
a/tika-serialization/src/test/resources/configs/tika-config-json.json 
b/tika-serialization/src/test/resources/configs/tika-config-json.json
index 3650aab64..8d1e5feb0 100644
--- a/tika-serialization/src/test/resources/configs/tika-config-json.json
+++ b/tika-serialization/src/test/resources/configs/tika-config-json.json
@@ -1,5 +1,3 @@
 {
-  "metadata-list": {
-    "maxStringLength": 50000000
-  }
-}
+  "maxJsonStringFieldLength": 50000000
+}
\ No newline at end of file
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
index 3650aab64..419a225e6 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
@@ -1,5 +1,3 @@
 {
-  "metadata-list": {
-    "maxStringLength": 50000000
-  }
+  "maxJsonStringFieldLength": 50000000
 }

Reply via email to