This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new fbf5f8252 Clean up jackson settings on metadata list
serialization/deserialization (#2464)
fbf5f8252 is described below
commit fbf5f82527d95dba3c95b346d73d1bb4102c34d1
Author: Tim Allison <[email protected]>
AuthorDate: Wed Dec 17 11:09:24 2025 -0500
Clean up jackson settings on metadata list serialization/deserialization
(#2464)
---
.../org/apache/tika/config/GlobalSettings.java | 57 --------------
.../apache/tika/config/loader/TikaJsonConfig.java | 2 +-
.../org/apache/tika/config/loader/TikaLoader.java | 60 ++++++++++++---
.../apache/tika/serialization/JsonMetadata.java | 89 +++++++++++++---------
.../tika/serialization/JsonMetadataList.java | 74 ++++++++++++------
.../test/resources/configs/tika-config-json.json | 6 +-
.../test/resources/configs/tika-config-json.json | 4 +-
7 files changed, 163 insertions(+), 129 deletions(-)
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
b/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
index 7d07c3b9e..7493000ae 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/GlobalSettings.java
@@ -25,7 +25,6 @@ import com.fasterxml.jackson.annotation.JsonProperty;
* <p>Example JSON:
* <pre>
* {
- * "maxJsonStringFieldLength": 50000000,
* "xml-reader-utils": {
* "maxEntityExpansions": 1000,
* "maxNumReuses": 100,
@@ -36,20 +35,6 @@ import com.fasterxml.jackson.annotation.JsonProperty;
*/
public class GlobalSettings {
- /**
- * Static maximum length for JSON string fields.
- * Default: 20,000,000 (Jackson's default)
- * This is static because it's a global setting that affects all JSON
parsing.
- */
- private static Integer maxJsonStringFieldLength = 20_000_000;
-
- /**
- * Instance field for deserialization from JSON.
- * The value is copied to the static field when set.
- */
- @JsonProperty("maxJsonStringFieldLength")
- private Integer instanceMaxJsonStringFieldLength = 20_000_000;
-
/**
* Service loader configuration for handling initialization problems.
*/
@@ -62,48 +47,6 @@ public class GlobalSettings {
@JsonProperty("xml-reader-utils")
private XmlReaderUtilsConfig xmlReaderUtils;
- /**
- * Gets the static maximum JSON string field length.
- *
- * @return the max length, or null if not set
- */
- public static Integer getMaxJsonStringFieldLength() {
- return maxJsonStringFieldLength;
- }
-
- /**
- * Sets the static maximum JSON string field length.
- * This affects all JSON parsing globally.
- *
- * @param length the max length to set
- */
- public static void setMaxJsonStringFieldLength(Integer length) {
- maxJsonStringFieldLength = length;
- }
-
- /**
- * Instance getter for deserialization.
- * Returns the instance value which may differ from the static value.
- *
- * @return the instance max length
- */
- public Integer getInstanceMaxJsonStringFieldLength() {
- return instanceMaxJsonStringFieldLength;
- }
-
- /**
- * Instance setter for deserialization.
- * Automatically updates the static field when set.
- *
- * @param length the max length to set
- */
- public void setInstanceMaxJsonStringFieldLength(Integer length) {
- this.instanceMaxJsonStringFieldLength = length;
- if (length != null) {
- setMaxJsonStringFieldLength(length);
- }
- }
-
public ServiceLoaderConfig getServiceLoader() {
return serviceLoader;
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
index 2eeb8bc7a..8ce14a30f 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
@@ -103,7 +103,7 @@ public class TikaJsonConfig {
*/
private static final Set<String> KNOWN_KEYS = Set.of(
// Globals
- "maxJsonStringFieldLength",
+ "metadata-list",
"service-loader",
"xml-reader-utils",
// Core Tika component keys
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
index 52e17d9d1..b82beb7d8 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
@@ -21,6 +21,8 @@ import java.nio.file.Path;
import java.util.Collections;
import java.util.List;
+import com.fasterxml.jackson.core.StreamReadConstraints;
+import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.tika.config.GlobalSettings;
@@ -39,6 +41,8 @@ import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.renderer.CompositeRenderer;
import org.apache.tika.renderer.Renderer;
+import org.apache.tika.serialization.JsonMetadata;
+import org.apache.tika.serialization.JsonMetadataList;
/**
* Main entry point for loading Tika components from JSON configuration.
@@ -396,17 +400,18 @@ public class TikaLoader {
*
* <p>Settings include:
* <ul>
- * <li>maxJsonStringFieldLength - Maximum JSON string field length
(static, affects all JSON parsing)</li>
- * <li>service-loader.initializableProblemHandler - How to handle
initialization problems</li>
+ * <li>metadata-list - Jackson StreamReadConstraints for
JsonMetadata/JsonMetadataList serialization</li>
+ * <li>service-loader - Service loader configuration</li>
* <li>xml-reader-utils - XML parser security settings</li>
* </ul>
*
* <p>Example JSON:
* <pre>
* {
- * "maxJsonStringFieldLength": 50000000,
- * "service-loader": {
- * "initializableProblemHandler": "ignore"
+ * "metadata-list": {
+ * "maxStringLength": 50000000,
+ * "maxNestingDepth": 10,
+ * "maxNumberLength": 500
* },
* "xml-reader-utils": {
* "maxEntityExpansions": 1000,
@@ -423,11 +428,8 @@ public class TikaLoader {
if (globalSettings == null) {
globalSettings = new GlobalSettings();
- // Load maxJsonStringFieldLength from top level and set it
statically
- if (config.getRootNode().has("maxJsonStringFieldLength")) {
- GlobalSettings.setMaxJsonStringFieldLength(
-
config.getRootNode().get("maxJsonStringFieldLength").asInt());
- }
+ // Load metadata-list config for JsonMetadata/JsonMetadataList
serialization
+ loadMetadataListConfig();
// Load service-loader config (official Tika config at root level)
GlobalSettings.ServiceLoaderConfig serviceLoaderConfig =
@@ -446,6 +448,44 @@ public class TikaLoader {
return globalSettings;
}
+ /**
+ * Loads the metadata-list configuration section and applies it to
+ * JsonMetadata and JsonMetadataList serializers.
+ * <p>
+ * Configuration uses Jackson's StreamReadConstraints property names:
+ * <pre>
+ * {
+ * "metadata-list": {
+ * "maxStringLength": 20000000,
+ * "maxNestingDepth": 10,
+ * "maxNumberLength": 500
+ * }
+ * }
+ * </pre>
+ */
+ private void loadMetadataListConfig() {
+ JsonNode metadataListNode = config.getRootNode().get("metadata-list");
+ if (metadataListNode == null) {
+ return;
+ }
+
+ StreamReadConstraints.Builder builder =
StreamReadConstraints.builder();
+
+ if (metadataListNode.has("maxStringLength")) {
+
builder.maxStringLength(metadataListNode.get("maxStringLength").asInt());
+ }
+ if (metadataListNode.has("maxNestingDepth")) {
+
builder.maxNestingDepth(metadataListNode.get("maxNestingDepth").asInt());
+ }
+ if (metadataListNode.has("maxNumberLength")) {
+
builder.maxNumberLength(metadataListNode.get("maxNumberLength").asInt());
+ }
+
+ StreamReadConstraints constraints = builder.build();
+ JsonMetadata.setStreamReadConstraints(constraints);
+ JsonMetadataList.setStreamReadConstraints(constraints);
+ }
+
/**
* Gets the global settings if they have been loaded.
*
diff --git
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
index e9adec234..504fb4f19 100644
---
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
+++
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadata.java
@@ -26,22 +26,69 @@ import com.fasterxml.jackson.core.StreamReadConstraints;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
-import org.apache.tika.config.GlobalSettings;
import org.apache.tika.metadata.Metadata;
public class JsonMetadata {
static volatile boolean PRETTY_PRINT = false;
- private static ObjectMapper OBJECT_MAPPER;
- private static final ObjectMapper PRETTY_SERIALIZER;
+ /**
+ * Default stream read constraints for metadata serialization.
+ */
+ private static final StreamReadConstraints DEFAULT_CONSTRAINTS =
StreamReadConstraints
+ .builder()
+ .maxNestingDepth(10)
+ .maxStringLength(20_000_000)
+ .maxNumberLength(500)
+ .build();
+
+ private static volatile StreamReadConstraints streamReadConstraints =
DEFAULT_CONSTRAINTS;
+ private static volatile ObjectMapper OBJECT_MAPPER;
+ private static volatile ObjectMapper PRETTY_SERIALIZER;
static {
- OBJECT_MAPPER =
buildObjectMapper(StreamReadConstraints.DEFAULT_MAX_STRING_LEN);
- PRETTY_SERIALIZER = new ObjectMapper();
+ rebuildObjectMappers();
+ }
+
+ private static void rebuildObjectMappers() {
+ JsonFactory factory = new JsonFactory();
+ factory.setStreamReadConstraints(streamReadConstraints);
+
+ ObjectMapper mapper = new ObjectMapper(factory);
+ SimpleModule baseModule = new SimpleModule();
+ baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
+ baseModule.addSerializer(Metadata.class, new MetadataSerializer());
+ mapper.registerModule(baseModule);
+ OBJECT_MAPPER = mapper;
+
+ ObjectMapper prettyMapper = new ObjectMapper(factory);
SimpleModule prettySerializerModule = new SimpleModule();
prettySerializerModule.addSerializer(Metadata.class, new
MetadataSerializer(true));
- PRETTY_SERIALIZER.registerModule(prettySerializerModule);
+ prettyMapper.registerModule(prettySerializerModule);
+ PRETTY_SERIALIZER = prettyMapper;
+ }
+
+ /**
+ * Sets the stream read constraints for JSON parsing of metadata.
+ * This affects all subsequent calls to {@link #fromJson(Reader)}.
+ * <p>
+ * Typically called by TikaLoader during initialization based on the
+ * "metadata-list" configuration section.
+ *
+ * @param constraints the constraints to use
+ */
+ public static synchronized void
setStreamReadConstraints(StreamReadConstraints constraints) {
+ streamReadConstraints = constraints;
+ rebuildObjectMappers();
+ }
+
+ /**
+ * Gets the current stream read constraints.
+ *
+ * @return the current constraints
+ */
+ public static StreamReadConstraints getStreamReadConstraints() {
+ return streamReadConstraints;
}
/**
@@ -62,46 +109,20 @@ public class JsonMetadata {
}
/**
- * Read metadata from reader.
- * <p>
- * This does not close the reader.
- * <p>
- * This will reset the OBJECT_MAPPER if the max string length differs from
that in TikaConfig.
+ * Read metadata from reader. This does not close the reader.
*
* @param reader reader to read from
- * @return Metadata or null if nothing could be read from the reader
+ * @return Metadata or null if reader is null
* @throws IOException in case of parse failure or IO failure with Reader
*/
public static Metadata fromJson(Reader reader) throws IOException {
if (reader == null) {
return null;
}
- if (OBJECT_MAPPER
- .getFactory()
- .streamReadConstraints()
- .getMaxStringLength() !=
GlobalSettings.getMaxJsonStringFieldLength()) {
- OBJECT_MAPPER =
buildObjectMapper(GlobalSettings.getMaxJsonStringFieldLength());
- }
return OBJECT_MAPPER.readValue(reader, Metadata.class);
}
public static void setPrettyPrinting(boolean prettyPrint) {
PRETTY_PRINT = prettyPrint;
}
-
- static ObjectMapper buildObjectMapper(int maxStringLen) {
- JsonFactory factory = new JsonFactory();
- factory.setStreamReadConstraints(StreamReadConstraints
- .builder()
- .maxNestingDepth(10)
- .maxStringLength(maxStringLen)
- .maxNumberLength(500)
- .build());
- ObjectMapper objectMapper = new ObjectMapper(factory);
- SimpleModule baseModule = new SimpleModule();
- baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
- baseModule.addSerializer(Metadata.class, new MetadataSerializer());
- objectMapper.registerModule(baseModule);
- return objectMapper;
- }
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
index 71427947b..7611cdfea 100644
---
a/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
+++
b/tika-serialization/src/main/java/org/apache/tika/serialization/JsonMetadataList.java
@@ -16,8 +16,6 @@
*/
package org.apache.tika.serialization;
-import static org.apache.tika.serialization.JsonMetadata.buildObjectMapper;
-
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
@@ -29,36 +27,69 @@ import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
-import org.apache.tika.config.GlobalSettings;
import org.apache.tika.metadata.Metadata;
public class JsonMetadataList {
static volatile boolean PRETTY_PRINT = false;
- private static ObjectMapper OBJECT_MAPPER;
- private static final ObjectMapper PRETTY_SERIALIZER;
+ /**
+ * Default stream read constraints for metadata list serialization.
+ */
+ private static final StreamReadConstraints DEFAULT_CONSTRAINTS =
StreamReadConstraints
+ .builder()
+ .maxNestingDepth(10)
+ .maxStringLength(20_000_000)
+ .maxNumberLength(500)
+ .build();
+
+ private static volatile StreamReadConstraints streamReadConstraints =
DEFAULT_CONSTRAINTS;
+ private static volatile ObjectMapper OBJECT_MAPPER;
+ private static volatile ObjectMapper PRETTY_SERIALIZER;
static {
+ rebuildObjectMappers();
+ }
+
+ private static void rebuildObjectMappers() {
JsonFactory factory = new JsonFactory();
- factory.setStreamReadConstraints(StreamReadConstraints
- .builder()
- .maxNestingDepth(10)
- .maxStringLength(GlobalSettings.getMaxJsonStringFieldLength())
- .maxNumberLength(500)
-//
.maxDocumentLength(1000000)
- .build());
- OBJECT_MAPPER = new ObjectMapper(factory);
+ factory.setStreamReadConstraints(streamReadConstraints);
+
+ ObjectMapper mapper = new ObjectMapper(factory);
SimpleModule baseModule = new SimpleModule();
baseModule.addDeserializer(Metadata.class, new MetadataDeserializer());
baseModule.addSerializer(Metadata.class, new MetadataSerializer());
- OBJECT_MAPPER.registerModule(baseModule);
+ mapper.registerModule(baseModule);
+ OBJECT_MAPPER = mapper;
- PRETTY_SERIALIZER = new ObjectMapper(factory);
+ ObjectMapper prettyMapper = new ObjectMapper(factory);
SimpleModule prettySerializerModule = new SimpleModule();
prettySerializerModule.addSerializer(Metadata.class, new
MetadataSerializer(true));
- PRETTY_SERIALIZER.registerModule(prettySerializerModule);
+ prettyMapper.registerModule(prettySerializerModule);
+ PRETTY_SERIALIZER = prettyMapper;
+ }
+
+ /**
+ * Sets the stream read constraints for JSON parsing of metadata lists.
+ * This affects all subsequent calls to {@link #fromJson(Reader)}.
+ * <p>
+ * Typically called by TikaLoader during initialization based on the
+ * "metadata-list" configuration section.
+ *
+ * @param constraints the constraints to use
+ */
+ public static synchronized void
setStreamReadConstraints(StreamReadConstraints constraints) {
+ streamReadConstraints = constraints;
+ rebuildObjectMappers();
+ }
+ /**
+ * Gets the current stream read constraints.
+ *
+ * @return the current constraints
+ */
+ public static StreamReadConstraints getStreamReadConstraints() {
+ return streamReadConstraints;
}
/**
@@ -89,21 +120,16 @@ public class JsonMetadataList {
}
/**
- * Read metadata from reader. This does not close the reader
+ * Read metadata from reader. This does not close the reader.
*
- * @param reader
- * @return Metadata or null if nothing could be read from the reader
+ * @param reader the reader to read from
+ * @return Metadata list or null if reader is null
* @throws IOException in case of parse failure or IO failure with Reader
*/
public static List<Metadata> fromJson(Reader reader) throws IOException {
if (reader == null) {
return null;
}
- if
(OBJECT_MAPPER.getFactory().streamReadConstraints().getMaxStringLength()
- != GlobalSettings.getMaxJsonStringFieldLength()) {
- OBJECT_MAPPER =
buildObjectMapper(GlobalSettings.getMaxJsonStringFieldLength());
- }
-
return OBJECT_MAPPER.readValue(reader, new
TypeReference<List<Metadata>>(){});
}
diff --git
a/tika-serialization/src/test/resources/configs/tika-config-json.json
b/tika-serialization/src/test/resources/configs/tika-config-json.json
index 8d1e5feb0..3650aab64 100644
--- a/tika-serialization/src/test/resources/configs/tika-config-json.json
+++ b/tika-serialization/src/test/resources/configs/tika-config-json.json
@@ -1,3 +1,5 @@
{
- "maxJsonStringFieldLength": 50000000
-}
\ No newline at end of file
+ "metadata-list": {
+ "maxStringLength": 50000000
+ }
+}
diff --git
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
index 419a225e6..3650aab64 100644
---
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
+++
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-json.json
@@ -1,3 +1,5 @@
{
- "maxJsonStringFieldLength": 50000000
+ "metadata-list": {
+ "maxStringLength": 50000000
+ }
}