This is an automated email from the ASF dual-hosted git repository.

apupier pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/camel-quarkus.git


The following commit(s) were added to refs/heads/main by this push:
     new e0cf724304 Provide test to extract metadata with docling
e0cf724304 is described below

commit e0cf724304dddc76b9149985df04d295d725d90a
Author: AurĂ©lien Pupier <[email protected]>
AuthorDate: Thu Jan 22 16:34:22 2026 +0100

    Provide test to extract metadata with docling
    
    it has a very limited scope, it requires
    https://issues.apache.org/jira/browse/CAMEL-22888 to be able to test
    more
    
    Signed-off-by: AurĂ©lien Pupier <[email protected]>
---
 integration-tests/docling/pom.xml                  |   4 ++
 .../component/docling/it/DoclingResource.java      |  29 ++++++++++---
 .../src/main/resources/application.properties      |   2 +-
 .../docling/src/main/resources/multi_page.pdf      | Bin 0 -> 128322 bytes
 .../quarkus/component/docling/it/DoclingTest.java  |  45 ++++++++++++++++++---
 5 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/integration-tests/docling/pom.xml 
b/integration-tests/docling/pom.xml
index 4ef99b156c..5b23a5bd60 100644
--- a/integration-tests/docling/pom.xml
+++ b/integration-tests/docling/pom.xml
@@ -43,6 +43,10 @@
             <groupId>io.quarkus</groupId>
             <artifactId>quarkus-resteasy</artifactId>
         </dependency>
+        <dependency>
+            <groupId>io.quarkus</groupId>
+            <artifactId>quarkus-resteasy-jackson</artifactId>
+        </dependency>
 
         <!-- test dependencies -->
         <dependency>
diff --git 
a/integration-tests/docling/src/main/java/org/apache/camel/quarkus/component/docling/it/DoclingResource.java
 
b/integration-tests/docling/src/main/java/org/apache/camel/quarkus/component/docling/it/DoclingResource.java
index 8c2c34fa50..f2196437e5 100644
--- 
a/integration-tests/docling/src/main/java/org/apache/camel/quarkus/component/docling/it/DoclingResource.java
+++ 
b/integration-tests/docling/src/main/java/org/apache/camel/quarkus/component/docling/it/DoclingResource.java
@@ -20,7 +20,9 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
 
+import io.quarkus.runtime.annotations.RegisterForReflection;
 import jakarta.enterprise.context.ApplicationScoped;
 import jakarta.inject.Inject;
 import jakarta.ws.rs.Consumes;
@@ -33,10 +35,12 @@ import jakarta.ws.rs.core.MediaType;
 import jakarta.ws.rs.core.Response;
 import org.apache.camel.CamelContext;
 import org.apache.camel.ProducerTemplate;
+import org.apache.camel.component.docling.DocumentMetadata;
 import org.jboss.logging.Logger;
 
 @Path("/docling")
 @ApplicationScoped
+@RegisterForReflection(targets = DocumentMetadata.class, methods = true)
 public class DoclingResource {
 
     private static final Logger LOG = Logger.getLogger(DoclingResource.class);
@@ -202,17 +206,30 @@ public class DoclingResource {
     @POST
     @Consumes(MediaType.TEXT_PLAIN)
     @Produces(MediaType.APPLICATION_JSON)
-    public Response extractMetadata(String documentContent) throws IOException 
{
+    public DocumentMetadata extractMetadata(String documentContent) throws 
IOException {
         java.nio.file.Path tempFile = Files.createTempFile("docling-test", 
".md");
         Files.writeString(tempFile, documentContent);
         try {
-            String result = 
producerTemplate.requestBody("direct:extractMetadata", tempFile.toString(), 
String.class);
-            return Response.ok(result).build();
-        } catch (Exception e) {
-            LOG.error("Failed to extract metadata", e);
-            return Response.status(500).entity("Error: " + 
e.getMessage()).build();
+            return producerTemplate.requestBody("direct:extractMetadata", 
tempFile.toString(),
+                    DocumentMetadata.class);
         } finally {
             Files.deleteIfExists(tempFile);
         }
     }
+
+    @Path("/metadata/extract/pdf")
+    @POST
+    @Produces(MediaType.APPLICATION_JSON)
+    public DocumentMetadata extractMetadataFromPdf() throws IOException {
+        try (InputStream is = 
getClass().getClassLoader().getResourceAsStream("multi_page.pdf")) {
+            java.nio.file.Path tempFile = 
Files.createTempFile("docling-test-multi_page", ".pdf");
+            Files.copy(is, tempFile.toAbsolutePath(), 
StandardCopyOption.REPLACE_EXISTING);
+            try {
+                return producerTemplate.requestBody("direct:extractMetadata", 
tempFile.toString(),
+                        DocumentMetadata.class);
+            } finally {
+                Files.deleteIfExists(tempFile);
+            }
+        }
+    }
 }
diff --git 
a/integration-tests/docling/src/main/resources/application.properties 
b/integration-tests/docling/src/main/resources/application.properties
index a00af82ad1..64eab96937 100644
--- a/integration-tests/docling/src/main/resources/application.properties
+++ b/integration-tests/docling/src/main/resources/application.properties
@@ -18,7 +18,7 @@
 quarkus.banner.enabled=false
 
 # Include test document resources in native image
-quarkus.native.resources.includes=test-document.txt,test-document.md
+quarkus.native.resources.includes=test-document.txt,test-document.md,multi_page.pdf
 
 # Docling component configuration
 camel.component.docling.use-docling-serve=true
diff --git a/integration-tests/docling/src/main/resources/multi_page.pdf 
b/integration-tests/docling/src/main/resources/multi_page.pdf
new file mode 100644
index 0000000000..7d9eb1818d
Binary files /dev/null and 
b/integration-tests/docling/src/main/resources/multi_page.pdf differ
diff --git 
a/integration-tests/docling/src/test/java/org/apache/camel/quarkus/component/docling/it/DoclingTest.java
 
b/integration-tests/docling/src/test/java/org/apache/camel/quarkus/component/docling/it/DoclingTest.java
index f010494a09..26142543e9 100644
--- 
a/integration-tests/docling/src/test/java/org/apache/camel/quarkus/component/docling/it/DoclingTest.java
+++ 
b/integration-tests/docling/src/test/java/org/apache/camel/quarkus/component/docling/it/DoclingTest.java
@@ -20,12 +20,14 @@ import io.quarkus.test.common.QuarkusTestResource;
 import io.quarkus.test.junit.QuarkusTest;
 import io.restassured.RestAssured;
 import io.restassured.http.ContentType;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.emptyString;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.not;
+import static org.hamcrest.core.StringStartsWith.startsWith;
 
 @QuarkusTest
 @QuarkusTestResource(DoclingTestResource.class)
@@ -117,7 +119,7 @@ class DoclingTest {
     }
 
     @Test
-    public void extractText() {
+    void extractText() {
         String testContent = "Document with text to extract.";
 
         RestAssured.given()
@@ -127,12 +129,38 @@ class DoclingTest {
                 .post("/docling/extract/text")
                 .then()
                 .statusCode(200)
-                .body(not(emptyString()));
+                .body(containsString(testContent));
+    }
+
+    @Test
+    void extractMetadataFromPdf() {
+        RestAssured.given()
+                .when()
+                .post("/docling/metadata/extract/pdf")
+                .then()
+                .statusCode(200)
+                .body("fileName", startsWith("docling-test"))
+                .body("filePath", containsString("docling-test"));
+        // TODO: improve test by checking other metadatas when 
https://issues.apache.org/jira/browse/CAMEL-22888 is fixed
     }
 
     @Test
-    public void extractMetadata() {
-        String testContent = "# Test Document\nSome content for metadata 
extraction.";
+    @Disabled("test to implement")
+    void extractTextFromPassordProtectedPdf() {
+    }
+
+    @Test
+    @Disabled("test to implement")
+    void extractTextWithOCROnScannedDocument() {
+    }
+
+    @Test
+    void extractMetadataFromMarkdown() {
+        String testContent = """
+                # Test Document
+
+                Some content for metadata extraction.
+                """;
 
         RestAssured.given()
                 .contentType(ContentType.TEXT)
@@ -141,7 +169,9 @@ class DoclingTest {
                 .post("/docling/metadata/extract")
                 .then()
                 .statusCode(200)
-                .body(not(emptyString()));
+                .body("fileName", startsWith("docling-test"))
+                .body("filePath", containsString("docling-test"));
+        // TODO: improve test by checking other metadatas when 
https://issues.apache.org/jira/browse/CAMEL-22888 is fixed
     }
 
     @Test
@@ -158,6 +188,11 @@ class DoclingTest {
                 .body(not(emptyString()));
     }
 
+    @Test
+    @Disabled("test to implement")
+    void convertToMarkdownAsyncInBatch() {
+    }
+
     @Test
     public void convertToHtmlAsync() {
         String testContent = "# Async HTML Test\nThis is a test for async HTML 
conversion.";

Reply via email to