This is an automated email from the ASF dual-hosted git repository.
apupier pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/main by this push:
new a79dc22489dd Add basic tests for docling extract_structured_data
a79dc22489dd is described below
commit a79dc22489dd16471474b0ee0c503c8ec2878fc9
Author: Aurélien Pupier <[email protected]>
AuthorDate: Fri Jan 23 16:33:45 2026 +0100
Add basic tests for docling extract_structured_data
Signed-off-by: Aurélien Pupier <[email protected]>
---
.../{ => integration}/BatchProcessingIT.java | 20 ++--
.../docling/integration/DoclingITestSupport.java | 50 +++++++++
.../integration/DoclingServeProducerIT.java | 30 +-----
.../integration/ExtractStructuredDataIT.java | 118 +++++++++++++++++++++
.../docling/integration/MetadataExtractionIT.java | 30 +-----
.../src/test/resources/picture_classification.pdf | Bin 0 -> 212855 bytes
6 files changed, 177 insertions(+), 71 deletions(-)
diff --git
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/BatchProcessingIT.java
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/BatchProcessingIT.java
similarity index 96%
rename from
components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/BatchProcessingIT.java
rename to
components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/BatchProcessingIT.java
index f8d05dfb662a..7a8bec034b94 100644
---
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/BatchProcessingIT.java
+++
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/BatchProcessingIT.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.camel.component.docling;
+package org.apache.camel.component.docling.integration;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -23,13 +23,12 @@ import java.util.List;
import org.apache.camel.CamelContext;
import org.apache.camel.builder.RouteBuilder;
-import org.apache.camel.test.infra.docling.services.DoclingService;
-import org.apache.camel.test.infra.docling.services.DoclingServiceFactory;
-import org.apache.camel.test.junit5.CamelTestSupport;
+import org.apache.camel.component.docling.BatchConversionResult;
+import org.apache.camel.component.docling.BatchProcessingResults;
+import org.apache.camel.component.docling.DoclingComponent;
+import org.apache.camel.component.docling.DoclingConfiguration;
+import org.apache.camel.component.docling.DoclingHeaders;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.extension.RegisterExtension;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -39,12 +38,7 @@ import static org.junit.jupiter.api.Assertions.fail;
/**
* Integration test for batch processing operations using test-infra for
container management.
*/
-public class BatchProcessingIT extends CamelTestSupport {
-
- private static final Logger LOG =
LoggerFactory.getLogger(BatchProcessingIT.class);
-
- @RegisterExtension
- static DoclingService doclingService =
DoclingServiceFactory.createService();
+class BatchProcessingIT extends DoclingITestSupport {
@Test
public void testBatchConvertToMarkdown() throws Exception {
diff --git
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingITestSupport.java
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingITestSupport.java
new file mode 100644
index 000000000000..458f0bcaab8e
--- /dev/null
+++
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingITestSupport.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.docling.integration;
+
+import org.apache.camel.CamelContext;
+import org.apache.camel.component.docling.DoclingComponent;
+import org.apache.camel.component.docling.DoclingConfiguration;
+import org.apache.camel.test.infra.docling.services.DoclingService;
+import org.apache.camel.test.infra.docling.services.DoclingServiceFactory;
+import org.apache.camel.test.junit5.CamelTestSupport;
+import org.junit.jupiter.api.extension.RegisterExtension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class DoclingITestSupport extends CamelTestSupport {
+
+ protected static final Logger LOG =
LoggerFactory.getLogger(DoclingITestSupport.class);
+
+ @RegisterExtension
+ static DoclingService doclingService =
DoclingServiceFactory.createService();
+
+ @Override
+ protected CamelContext createCamelContext() throws Exception {
+ CamelContext context = super.createCamelContext();
+ DoclingComponent docling = context.getComponent("docling",
DoclingComponent.class);
+ DoclingConfiguration conf = new DoclingConfiguration();
+ conf.setUseDoclingServe(true);
+ conf.setDoclingServeUrl(doclingService.doclingServerUrl());
+ docling.setConfiguration(conf);
+
+ LOG.info("Testing Docling-Serve at: {}",
doclingService.doclingServerUrl());
+
+ return context;
+ }
+
+}
diff --git
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingServeProducerIT.java
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingServeProducerIT.java
index f48e9fa1b0c7..859eb0b02f6f 100644
---
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingServeProducerIT.java
+++
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingServeProducerIT.java
@@ -20,22 +20,13 @@ import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
-import org.apache.camel.CamelContext;
import org.apache.camel.builder.RouteBuilder;
import org.apache.camel.component.docling.ConversionStatus;
-import org.apache.camel.component.docling.DoclingComponent;
-import org.apache.camel.component.docling.DoclingConfiguration;
import org.apache.camel.component.docling.DoclingHeaders;
import org.apache.camel.component.docling.DoclingOperations;
-import org.apache.camel.test.infra.docling.services.DoclingService;
-import org.apache.camel.test.infra.docling.services.DoclingServiceFactory;
-import org.apache.camel.test.junit5.CamelTestSupport;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
-import org.junit.jupiter.api.extension.RegisterExtension;
import org.junit.jupiter.api.io.TempDir;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -48,30 +39,11 @@ import static org.junit.jupiter.api.Assertions.fail;
* container for testing without manual setup.
*/
@DisabledIfSystemProperty(named = "ci.env.name", matches = ".*",
disabledReason = "Too much resources on GitHub Actions")
-public class DoclingServeProducerIT extends CamelTestSupport {
-
- private static final Logger LOG =
LoggerFactory.getLogger(DoclingServeProducerIT.class);
-
- @RegisterExtension
- static DoclingService doclingService =
DoclingServiceFactory.createService();
+class DoclingServeProducerIT extends DoclingITestSupport {
@TempDir
Path outputDir;
- @Override
- protected CamelContext createCamelContext() throws Exception {
- CamelContext context = super.createCamelContext();
- DoclingComponent docling = context.getComponent("docling",
DoclingComponent.class);
- DoclingConfiguration conf = new DoclingConfiguration();
- conf.setUseDoclingServe(true);
- conf.setDoclingServeUrl(doclingService.doclingServerUrl());
- docling.setConfiguration(conf);
-
- LOG.info("Testing Docling-Serve at: {}",
doclingService.doclingServerUrl());
-
- return context;
- }
-
@Test
public void testMarkdownConversionWithDoclingServe() throws Exception {
Path testFile = createTestFile();
diff --git
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/ExtractStructuredDataIT.java
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/ExtractStructuredDataIT.java
new file mode 100644
index 000000000000..703fdcb27778
--- /dev/null
+++
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/ExtractStructuredDataIT.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.docling.integration;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.util.List;
+
+import ai.docling.core.DoclingDocument;
+import ai.docling.core.DoclingDocument.PictureItem;
+import ai.docling.core.DoclingDocument.TableData;
+import ai.docling.core.DoclingDocument.TableItem;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.docling.DoclingHeaders;
+import org.apache.camel.component.docling.DoclingOperations;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+@DisabledIfSystemProperty(named = "ci.env.name", matches = ".*",
disabledReason = "Too much resources on GitHub Actions")
+class ExtractStructuredDataIT extends DoclingITestSupport {
+
+ @Test
+ void extractTableFromMarkdown() throws Exception {
+ Path testFile = createTestFile();
+
+ String result =
template.requestBodyAndHeader("direct:extract-structured-data",
+ testFile.toString(),
+ DoclingHeaders.OPERATION,
DoclingOperations.EXTRACT_STRUCTURED_DATA, String.class);
+ ObjectMapper mapper = new ObjectMapper();
+ DoclingDocument doclingDocument = mapper.readValue(result,
DoclingDocument.class);
+
+ List<TableItem> tables = doclingDocument.getTables();
+ assertThat(tables).hasSize(1);
+ TableData table = tables.get(0).getData();
+ assertThat(table.getNumCols()).isEqualTo(3);
+ assertThat(table.getNumRows()).isEqualTo(4);
+ assertThat(table.getGrid().get(1).get(2).getText()).isEqualTo("C1");
+ }
+
+ @Test
+ void extractImageFromPDF() throws Exception {
+ Path testFile = createTestPdfFile();
+
+ String result =
template.requestBodyAndHeader("direct:extract-structured-data",
+ testFile.toString(),
+ DoclingHeaders.OPERATION,
DoclingOperations.EXTRACT_STRUCTURED_DATA, String.class);
+ ObjectMapper mapper = new ObjectMapper();
+ DoclingDocument doclingDocument = mapper.readValue(result,
DoclingDocument.class);
+
+ List<PictureItem> pictures = doclingDocument.getPictures();
+ assertThat(pictures).hasSize(2);
+ }
+
+ private Path createTestFile() throws Exception {
+ Path tempFile =
Files.createTempFile("docling-extract-structureddata-test-", ".md");
+ String content = """
+ # Test Document
+
+ This is a test document for structured data
+
+ ## Section 1
+
+ Some content here.
+
+ - List item 1
+ - List item 2
+
+ ## Section 2
+
+ | A | B | C |
+ |---|---|---|
+ | A1 | B1 | C1 |
+ | A2| B2 | C2 |
+ | A3 | B3 | C3 |
+ """;
+ Files.write(tempFile, content.getBytes());
+ return tempFile;
+ }
+
+ private Path createTestPdfFile() throws IOException {
+ try (InputStream is =
getClass().getClassLoader().getResourceAsStream("picture_classification.pdf")) {
+ java.nio.file.Path tempFile =
Files.createTempFile("docling-test-picture_classification", ".pdf");
+ Files.copy(is, tempFile.toAbsolutePath(),
StandardCopyOption.REPLACE_EXISTING);
+ return tempFile;
+ }
+ }
+
+ @Override
+ protected RouteBuilder createRouteBuilder() throws Exception {
+ return new RouteBuilder() {
+ @Override
+ public void configure() throws Exception {
+ from("direct:extract-structured-data")
+
.to("docling:convert?operation=EXTRACT_STRUCTURED_DATA&contentInBody=true");
+ }
+ };
+ }
+}
diff --git
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/MetadataExtractionIT.java
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/MetadataExtractionIT.java
index 0e9003801d99..5b051a62a91e 100644
---
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/MetadataExtractionIT.java
+++
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/MetadataExtractionIT.java
@@ -23,19 +23,10 @@ import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Map;
-import org.apache.camel.CamelContext;
import org.apache.camel.builder.RouteBuilder;
-import org.apache.camel.component.docling.DoclingComponent;
-import org.apache.camel.component.docling.DoclingConfiguration;
import org.apache.camel.component.docling.DocumentMetadata;
-import org.apache.camel.test.infra.docling.services.DoclingService;
-import org.apache.camel.test.infra.docling.services.DoclingServiceFactory;
-import org.apache.camel.test.junit5.CamelTestSupport;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
-import org.junit.jupiter.api.extension.RegisterExtension;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -46,26 +37,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
* Integration test for metadata extraction operations using test-infra for
container management.
*/
@DisabledIfSystemProperty(named = "ci.env.name", matches = ".*",
disabledReason = "Too much resources on GitHub Actions")
-public class MetadataExtractionIT extends CamelTestSupport {
-
- private static final Logger LOG =
LoggerFactory.getLogger(MetadataExtractionIT.class);
-
- @RegisterExtension
- static DoclingService doclingService =
DoclingServiceFactory.createService();
-
- @Override
- protected CamelContext createCamelContext() throws Exception {
- CamelContext context = super.createCamelContext();
- DoclingComponent docling = context.getComponent("docling",
DoclingComponent.class);
- DoclingConfiguration conf = new DoclingConfiguration();
- conf.setUseDoclingServe(true);
- conf.setDoclingServeUrl(doclingService.doclingServerUrl());
- docling.setConfiguration(conf);
-
- LOG.info("Testing Docling-Serve metadata extraction at: {}",
doclingService.doclingServerUrl());
-
- return context;
- }
+public class MetadataExtractionIT extends DoclingITestSupport {
@Test
public void testBasicMetadataExtraction() throws Exception {
diff --git
a/components/camel-ai/camel-docling/src/test/resources/picture_classification.pdf
b/components/camel-ai/camel-docling/src/test/resources/picture_classification.pdf
new file mode 100644
index 000000000000..230f74fd41a8
Binary files /dev/null and
b/components/camel-ai/camel-docling/src/test/resources/picture_classification.pdf
differ