This is an automated email from the ASF dual-hosted git repository.
apupier pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/camel-quarkus.git
The following commit(s) were added to refs/heads/main by this push:
new e0cf724304 Provide test to extract metadata with docling
e0cf724304 is described below
commit e0cf724304dddc76b9149985df04d295d725d90a
Author: Aurélien Pupier <[email protected]>
AuthorDate: Thu Jan 22 16:34:22 2026 +0100
Provide test to extract metadata with docling
it has a very limited scope, it requires
https://issues.apache.org/jira/browse/CAMEL-22888 to be able to test
more
Signed-off-by: Aurélien Pupier <[email protected]>
---
integration-tests/docling/pom.xml | 4 ++
.../component/docling/it/DoclingResource.java | 29 ++++++++++---
.../src/main/resources/application.properties | 2 +-
.../docling/src/main/resources/multi_page.pdf | Bin 0 -> 128322 bytes
.../quarkus/component/docling/it/DoclingTest.java | 45 ++++++++++++++++++---
5 files changed, 68 insertions(+), 12 deletions(-)
diff --git a/integration-tests/docling/pom.xml
b/integration-tests/docling/pom.xml
index 4ef99b156c..5b23a5bd60 100644
--- a/integration-tests/docling/pom.xml
+++ b/integration-tests/docling/pom.xml
@@ -43,6 +43,10 @@
<groupId>io.quarkus</groupId>
<artifactId>quarkus-resteasy</artifactId>
</dependency>
+ <dependency>
+ <groupId>io.quarkus</groupId>
+ <artifactId>quarkus-resteasy-jackson</artifactId>
+ </dependency>
<!-- test dependencies -->
<dependency>
diff --git
a/integration-tests/docling/src/main/java/org/apache/camel/quarkus/component/docling/it/DoclingResource.java
b/integration-tests/docling/src/main/java/org/apache/camel/quarkus/component/docling/it/DoclingResource.java
index 8c2c34fa50..f2196437e5 100644
---
a/integration-tests/docling/src/main/java/org/apache/camel/quarkus/component/docling/it/DoclingResource.java
+++
b/integration-tests/docling/src/main/java/org/apache/camel/quarkus/component/docling/it/DoclingResource.java
@@ -20,7 +20,9 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import io.quarkus.runtime.annotations.RegisterForReflection;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import jakarta.ws.rs.Consumes;
@@ -33,10 +35,12 @@ import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response;
import org.apache.camel.CamelContext;
import org.apache.camel.ProducerTemplate;
+import org.apache.camel.component.docling.DocumentMetadata;
import org.jboss.logging.Logger;
@Path("/docling")
@ApplicationScoped
+@RegisterForReflection(targets = DocumentMetadata.class, methods = true)
public class DoclingResource {
private static final Logger LOG = Logger.getLogger(DoclingResource.class);
@@ -202,17 +206,30 @@ public class DoclingResource {
@POST
@Consumes(MediaType.TEXT_PLAIN)
@Produces(MediaType.APPLICATION_JSON)
- public Response extractMetadata(String documentContent) throws IOException
{
+ public DocumentMetadata extractMetadata(String documentContent) throws
IOException {
java.nio.file.Path tempFile = Files.createTempFile("docling-test",
".md");
Files.writeString(tempFile, documentContent);
try {
- String result =
producerTemplate.requestBody("direct:extractMetadata", tempFile.toString(),
String.class);
- return Response.ok(result).build();
- } catch (Exception e) {
- LOG.error("Failed to extract metadata", e);
- return Response.status(500).entity("Error: " +
e.getMessage()).build();
+ return producerTemplate.requestBody("direct:extractMetadata",
tempFile.toString(),
+ DocumentMetadata.class);
} finally {
Files.deleteIfExists(tempFile);
}
}
+
+ @Path("/metadata/extract/pdf")
+ @POST
+ @Produces(MediaType.APPLICATION_JSON)
+ public DocumentMetadata extractMetadataFromPdf() throws IOException {
+ try (InputStream is =
getClass().getClassLoader().getResourceAsStream("multi_page.pdf")) {
+ java.nio.file.Path tempFile =
Files.createTempFile("docling-test-multi_page", ".pdf");
+ Files.copy(is, tempFile.toAbsolutePath(),
StandardCopyOption.REPLACE_EXISTING);
+ try {
+ return producerTemplate.requestBody("direct:extractMetadata",
tempFile.toString(),
+ DocumentMetadata.class);
+ } finally {
+ Files.deleteIfExists(tempFile);
+ }
+ }
+ }
}
diff --git
a/integration-tests/docling/src/main/resources/application.properties
b/integration-tests/docling/src/main/resources/application.properties
index a00af82ad1..64eab96937 100644
--- a/integration-tests/docling/src/main/resources/application.properties
+++ b/integration-tests/docling/src/main/resources/application.properties
@@ -18,7 +18,7 @@
quarkus.banner.enabled=false
# Include test document resources in native image
-quarkus.native.resources.includes=test-document.txt,test-document.md
+quarkus.native.resources.includes=test-document.txt,test-document.md,multi_page.pdf
# Docling component configuration
camel.component.docling.use-docling-serve=true
diff --git a/integration-tests/docling/src/main/resources/multi_page.pdf
b/integration-tests/docling/src/main/resources/multi_page.pdf
new file mode 100644
index 0000000000..7d9eb1818d
Binary files /dev/null and
b/integration-tests/docling/src/main/resources/multi_page.pdf differ
diff --git
a/integration-tests/docling/src/test/java/org/apache/camel/quarkus/component/docling/it/DoclingTest.java
b/integration-tests/docling/src/test/java/org/apache/camel/quarkus/component/docling/it/DoclingTest.java
index f010494a09..26142543e9 100644
---
a/integration-tests/docling/src/test/java/org/apache/camel/quarkus/component/docling/it/DoclingTest.java
+++
b/integration-tests/docling/src/test/java/org/apache/camel/quarkus/component/docling/it/DoclingTest.java
@@ -20,12 +20,14 @@ import io.quarkus.test.common.QuarkusTestResource;
import io.quarkus.test.junit.QuarkusTest;
import io.restassured.RestAssured;
import io.restassured.http.ContentType;
+import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.emptyString;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
+import static org.hamcrest.core.StringStartsWith.startsWith;
@QuarkusTest
@QuarkusTestResource(DoclingTestResource.class)
@@ -117,7 +119,7 @@ class DoclingTest {
}
@Test
- public void extractText() {
+ void extractText() {
String testContent = "Document with text to extract.";
RestAssured.given()
@@ -127,12 +129,38 @@ class DoclingTest {
.post("/docling/extract/text")
.then()
.statusCode(200)
- .body(not(emptyString()));
+ .body(containsString(testContent));
+ }
+
+ @Test
+ void extractMetadataFromPdf() {
+ RestAssured.given()
+ .when()
+ .post("/docling/metadata/extract/pdf")
+ .then()
+ .statusCode(200)
+ .body("fileName", startsWith("docling-test"))
+ .body("filePath", containsString("docling-test"));
+ // TODO: improve test by checking other metadatas when
https://issues.apache.org/jira/browse/CAMEL-22888 is fixed
}
@Test
- public void extractMetadata() {
- String testContent = "# Test Document\nSome content for metadata
extraction.";
+ @Disabled("test to implement")
+ void extractTextFromPassordProtectedPdf() {
+ }
+
+ @Test
+ @Disabled("test to implement")
+ void extractTextWithOCROnScannedDocument() {
+ }
+
+ @Test
+ void extractMetadataFromMarkdown() {
+ String testContent = """
+ # Test Document
+
+ Some content for metadata extraction.
+ """;
RestAssured.given()
.contentType(ContentType.TEXT)
@@ -141,7 +169,9 @@ class DoclingTest {
.post("/docling/metadata/extract")
.then()
.statusCode(200)
- .body(not(emptyString()));
+ .body("fileName", startsWith("docling-test"))
+ .body("filePath", containsString("docling-test"));
+ // TODO: improve test by checking other metadatas when
https://issues.apache.org/jira/browse/CAMEL-22888 is fixed
}
@Test
@@ -158,6 +188,11 @@ class DoclingTest {
.body(not(emptyString()));
}
+ @Test
+ @Disabled("test to implement")
+ void convertToMarkdownAsyncInBatch() {
+ }
+
@Test
public void convertToHtmlAsync() {
String testContent = "# Async HTML Test\nThis is a test for async HTML
conversion.";