This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new d66f9c8a7 TIKA-4581 metadata list serialization again (#2466)
d66f9c8a7 is described below
commit d66f9c8a7438e3a4f2a094995b2319d6b3636ec6
Author: Tim Allison <[email protected]>
AuthorDate: Wed Dec 17 14:52:42 2025 -0500
TIKA-4581 metadata list serialization again (#2466)
---
.../src/main/resources/META-INF/tika/other-configs.idx | 15 +++++++++++++++
.../org/apache/tika/config/loader/ComponentRegistry.java | 11 ++++++-----
.../metadata/filter/AttachmentCountingListFilter.java | 11 +++++++++++
.../tika/serialization/TestParseContextSerialization.java | 10 ++++++++--
4 files changed, 40 insertions(+), 7 deletions(-)
diff --git
a/tika-pipes/tika-pipes-api/src/main/resources/META-INF/tika/other-configs.idx
b/tika-pipes/tika-pipes-api/src/main/resources/META-INF/tika/other-configs.idx
index 0c9f7d254..4b6648279 100644
---
a/tika-pipes/tika-pipes-api/src/main/resources/META-INF/tika/other-configs.idx
+++
b/tika-pipes/tika-pipes-api/src/main/resources/META-INF/tika/other-configs.idx
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
# Component registry for tika-pipes-api
# Format: friendly-name=fully.qualified.ClassName
# this has to be manually generated for now because of the dependency graph
diff --git
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
index 700d93761..1ab7014ed 100644
---
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
+++
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
@@ -49,7 +49,7 @@ import org.apache.tika.exception.TikaConfigException;
public class ComponentRegistry {
private final Map<String, ComponentInfo> components;
- private final Map<Class<?>, String> classToName; // Reverse lookup
+ private final Map<String, String> classNameToFriendlyName; // Reverse
lookup by class name
private final ClassLoader classLoader;
/**
@@ -64,10 +64,10 @@ public class ComponentRegistry {
throws TikaConfigException {
this.classLoader = classLoader;
this.components = loadComponents(indexFileName);
- // Build reverse lookup
- this.classToName = new HashMap<>();
+ // Build reverse lookup by class name (not Class object) to handle
classloader differences
+ this.classNameToFriendlyName = new HashMap<>();
for (Map.Entry<String, ComponentInfo> entry : components.entrySet()) {
- classToName.put(entry.getValue().componentClass(), entry.getKey());
+
classNameToFriendlyName.put(entry.getValue().componentClass().getName(),
entry.getKey());
}
}
@@ -120,12 +120,13 @@ public class ComponentRegistry {
/**
* Looks up a component's friendly name by its class.
+ * Uses class name (not Class object) for lookup to handle classloader
differences.
*
* @param clazz the component class
* @return the friendly name, or null if not registered
*/
public String getFriendlyName(Class<?> clazz) {
- return classToName.get(clazz);
+ return classNameToFriendlyName.get(clazz.getName());
}
private Map<String, ComponentInfo> loadComponents(String indexFileName)
diff --git
a/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
b/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
index ffd73db11..e33390a7c 100644
---
a/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
+++
b/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
@@ -24,12 +24,23 @@ import org.apache.tika.metadata.Metadata;
@TikaComponent
public class AttachmentCountingListFilter extends MetadataFilter {
+
+ private Integer count = 0;
@Override
public List<Metadata> filter(List<Metadata> metadataList) throws
TikaException {
if (metadataList == null || metadataList.isEmpty()) {
return metadataList;
}
metadataList.get(0).set("X-TIKA:attachment_count",
Integer.toString(metadataList.size() - 1));
+ count += metadataList.size();
return metadataList;
}
+
+ public Integer getCount() {
+ return count;
+ }
+
+ public void setCount(Integer count) {
+ this.count = count;
+ }
}
diff --git
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
index 5292ece26..4a300830c 100644
---
a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
+++
b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java
@@ -24,6 +24,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.StringWriter;
import java.io.Writer;
import java.util.List;
+import java.util.Set;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonNode;
@@ -38,6 +39,7 @@ import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.extractor.SkipEmbeddedDocumentSelector;
import org.apache.tika.metadata.filter.AttachmentCountingListFilter;
import org.apache.tika.metadata.filter.CompositeMetadataFilter;
+import org.apache.tika.metadata.filter.IncludeFieldMetadataFilter;
import org.apache.tika.metadata.filter.MetadataFilter;
import org.apache.tika.metadata.filter.MockUpperCaseFilter;
import org.apache.tika.parser.ParseContext;
@@ -327,7 +329,9 @@ public class TestParseContextSerialization {
@Test
public void testMetadataListPOJO() throws Exception {
- CompositeMetadataFilter metadataFilter = new
CompositeMetadataFilter(List.of(new AttachmentCountingListFilter(), new
MockUpperCaseFilter()));
+ CompositeMetadataFilter metadataFilter =
+ new CompositeMetadataFilter(List.of(new MockUpperCaseFilter(),
new AttachmentCountingListFilter(),
+ new IncludeFieldMetadataFilter(Set.of("blah",
"blah2"))));
ParseContext parseContext = new ParseContext();
parseContext.set(MetadataFilter.class, metadataFilter);
@@ -340,7 +344,9 @@ public class TestParseContextSerialization {
assertNotNull(resolvedFilter, "MetadataFilter should be resolved");
assertEquals(CompositeMetadataFilter.class, resolvedFilter.getClass());
CompositeMetadataFilter deserFilter = (CompositeMetadataFilter)
resolvedFilter;
- assertEquals(AttachmentCountingListFilter.class,
deserFilter.getFilters().get(0).getClass());
+ assertEquals(MockUpperCaseFilter.class,
deserFilter.getFilters().get(0).getClass());
+ assertEquals(AttachmentCountingListFilter.class,
deserFilter.getFilters().get(1).getClass());
+ assertEquals(IncludeFieldMetadataFilter.class,
deserFilter.getFilters().get(2).getClass());
}
@Test